diff --git a/DESCRIPTION b/DESCRIPTION index a3a11b8..8175bd0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ROMOPAPI Title: ROMOPAPI -Version: 2.2.0 +Version: 2.3.0 Authors@R: person("Javier", "Gracia-Tabuenca", , "javier.graciatabuenca@tuni.fi", role = c("aut", "cre"), comment = c(ORCID = "YOUR-ORCID-ID")) diff --git a/Dockerfile b/Dockerfile index 1f299cb..3075653 100644 --- a/Dockerfile +++ b/Dockerfile @@ -24,12 +24,15 @@ RUN apt-get update && apt-get install -y openjdk-8-jdk liblzma-dev libbz2-dev li ARG ROMOPAPI_BRANCH=main ARG BUILD_CACHE_BUSTER=5 +COPY . /opt/ROMOPAPI + # Install renv and restore packages RUN --mount=type=secret,id=build_github_pat \ cp /usr/local/lib/R/etc/Renviron /tmp/Renviron \ && echo "GITHUB_PAT=$(cat /run/secrets/build_github_pat)" >> /usr/local/lib/R/etc/Renviron \ && Rscript -e 'install.packages("remotes")' \ - && Rscript -e 'remotes::install_github("FINNGEN/ROMOPAPI@'$ROMOPAPI_BRANCH'")' \ + && Rscript -e "remotes::install_local('/opt/ROMOPAPI', upgrade = 'never', dependencies = TRUE)" \ + && Rscript -e "if (!requireNamespace('ROMOPAPI', quietly = TRUE)) stop('ROMOPAPI installation failed')" \ && cp /tmp/Renviron /usr/local/lib/R/etc/Renviron; # Expose the port that the API will run on diff --git a/NAMESPACE b/NAMESPACE index 2540e8c..3551511 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -19,6 +19,8 @@ export(getCodeCounts_memoise) export(getConceptsWithCodeCounts) export(getConceptsWithCodeCounts_memoise) export(getLogs) +export(getVisitTypeNames) +export(getVisitTypeNames_memoise) export(helper_FinnGen_getDatabaseFile) export(helper_FinnGen_getDatabaseFileCounts) export(helper_createSqliteDatabaseFromDatabase) diff --git a/NEWS.md b/NEWS.md index b5df3e2..ef021c2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,8 @@ -# ROMOPAPI +# ROMOPAPI 2.3.0 +- /getCodeCounts now retuns one more column `visit_group_concept_id` in `stratified_code_counts` table, this is the register they come from +- New endpoint /getVisitTypeNames returns a table with the names and codes for `visit_group_concept_id`s + +# ROMOPAPI 2.2.0 - Added number_of_descendants to concepts with code counts - Added conceptId 21600744 to testing data diff --git a/R/createCodeCountsTables.R b/R/createCodeCountsTables.R index a343590..8e2cbf9 100644 --- a/R/createCodeCountsTables.R +++ b/R/createCodeCountsTables.R @@ -8,6 +8,7 @@ #' @param CDMdbHandler A CDMdbHandler object that contains database connection details #' @param domains Optional vector of domains to process. If NULL, processes all standard domains #' @param codeCountsTable Name of the table to create. Defaults to "code_counts" +#' @param visitSourceGroupConceptIds Optional vector of visit source group concept IDs to filter by. Defaults to 0 #' #' @return Nothing. Creates a table called 'code_counts' in the results schema with columns: #' \itemize{ @@ -35,14 +36,16 @@ #' \dontrun{ #' # Create code counts table for all domains #' createCodeCountsTable(CDMdbHandler) -#' +#' #' # Create code counts table for specific domains only #' createCodeCountsTable(CDMdbHandler, domains = c("Condition", "Drug")) #' } createCodeCountsTables <- function( CDMdbHandler, - domains = NULL, - codeCountsTable = "code_counts") { + domains = NULL, + codeCountsTable = "code_counts", + visitSourceGroupConceptIds = 0 +) { # # VALIDATE # @@ -58,13 +61,23 @@ createCodeCountsTables <- function( # - Create stratified code counts table stratifiedCodeCountsTable <- paste0("stratified_", codeCountsTable) - createStratifiedCodeCountsTable(CDMdbHandler, domains = domains, stratifiedCodeCountsTable = stratifiedCodeCountsTable) - + createStratifiedCodeCountsTable( + CDMdbHandler, + domains = domains, + stratifiedCodeCountsTable = stratifiedCodeCountsTable, + visitSourceGroupConceptIds = visitSourceGroupConceptIds + ) # - Create code counts table - sqlPath <- system.file("sql", "sql_server", "createCodeCountsTable.sql", package = "ROMOPAPI") + sqlPath <- system.file( + "sql", + "sql_server", + "createCodeCountsTable.sql", + package = "ROMOPAPI" + ) sql <- SqlRender::readSql(sqlPath) - sql <- SqlRender::render(sql, + sql <- SqlRender::render( + sql, cdmDatabaseSchema = cdmDatabaseSchema, resultsDatabaseSchema = resultsDatabaseSchema, codeCountsTable = codeCountsTable, diff --git a/R/createStratifiedCodeCountsTable.R b/R/createStratifiedCodeCountsTable.R index 59720d7..84f1ea8 100644 --- a/R/createStratifiedCodeCountsTable.R +++ b/R/createStratifiedCodeCountsTable.R @@ -8,6 +8,7 @@ #' @param CDMdbHandler A CDMdbHandler object that contains database connection details #' @param domains Optional data frame defining domains to process. If NULL, uses standard OMOP domains #' @param stratifiedCodeCountsTable Name of the stratified counts table to create. Defaults to "stratified_code_counts" +#' @param visitSourceGroupConceptIds Optional vector of visit source group concept IDs to filter by. Defaults to 0 #' #' @return Nothing. Creates a table called 'stratified_code_counts' in the results schema with columns: #' \itemize{ @@ -41,7 +42,9 @@ createStratifiedCodeCountsTable <- function( CDMdbHandler, domains = NULL, - stratifiedCodeCountsTable = "stratified_code_counts") { + stratifiedCodeCountsTable = "stratified_code_counts", + visitSourceGroupConceptIds = 0 + ) { # # VALIDATE # @@ -51,6 +54,7 @@ createStratifiedCodeCountsTable <- function( cdmDatabaseSchema <- CDMdbHandler$cdmDatabaseSchema resultsDatabaseSchema <- CDMdbHandler$resultsDatabaseSchema + if (is.null(domains)) { domains <- tibble::tribble( ~domain_id, ~table_name, ~concept_id_field, ~date_field, ~maps_to_concept_id_field, @@ -80,6 +84,7 @@ createStratifiedCodeCountsTable <- function( CREATE TABLE @resultsDatabaseSchema.@stratifiedCodeCountsTable ( concept_id INTEGER, maps_to_concept_id INTEGER, + visit_group_concept_id INTEGER, calendar_year INTEGER, gender_concept_id INTEGER, age_decile INTEGER, @@ -102,7 +107,8 @@ createStratifiedCodeCountsTable <- function( table_name = domain$table_name, concept_id_field = domain$concept_id_field, date_field = domain$date_field, - maps_to_concept_id_field = domain$maps_to_concept_id_field + maps_to_concept_id_field = domain$maps_to_concept_id_field, + visit_group_concept_ids = paste0(visitSourceGroupConceptIds, collapse = ", ") ) sql <- SqlRender::translate(sql, targetDialect = connection@dbms) diff --git a/R/getCodeCounts.R b/R/getCodeCounts.R index 0a8f34d..fca71a6 100644 --- a/R/getCodeCounts.R +++ b/R/getCodeCounts.R @@ -193,10 +193,10 @@ getCodeCounts <- function( codeCounts |> dplyr::select(-concept_id) |> dplyr::rename(concept_id = maps_to_concept_id) |> - dplyr::distinct(concept_id, calendar_year, gender_concept_id, age_decile, record_counts), + dplyr::distinct(concept_id, visit_group_concept_id, calendar_year, gender_concept_id, age_decile, record_counts), # standard concepts, agregate counts codeCounts |> dplyr::select(-maps_to_concept_id) |> - dplyr::group_by(concept_id, calendar_year, gender_concept_id, age_decile) |> + dplyr::group_by(concept_id, visit_group_concept_id, calendar_year, gender_concept_id, age_decile) |> dplyr::summarise(record_counts = sum(record_counts), .groups = "drop") ) |> # If concept maps to itself, bcs concept in concept and source concept columns, dont take it @@ -221,14 +221,14 @@ getCodeCounts <- function( nodeDescendantRecordCounts <- ancestorTableOfDescendant |> dplyr::inner_join(codeCountsPerId, by = c("descendant_concept_id" = "concept_id")) |> - dplyr::group_by(concept_id, calendar_year, gender_concept_id, age_decile) |> + dplyr::group_by(concept_id, visit_group_concept_id, calendar_year, gender_concept_id, age_decile) |> dplyr::summarise( descendant_record_counts = sum(record_counts), .groups = "drop" ) stratifiedCodeCounts <- codeCountsPerId |> - dplyr::full_join(nodeDescendantRecordCounts, by = c("concept_id", "calendar_year", "gender_concept_id", "age_decile")) |> + dplyr::full_join(nodeDescendantRecordCounts, by = c("concept_id", "visit_group_concept_id", "calendar_year", "gender_concept_id", "age_decile")) |> dplyr::mutate( descendant_record_counts = dplyr::if_else(is.na(descendant_record_counts), record_counts, descendant_record_counts), record_counts = dplyr::if_else(is.na(record_counts), 0, record_counts) diff --git a/R/getVisitTypeNames.R b/R/getVisitTypeNames.R new file mode 100644 index 0000000..3e98cc6 --- /dev/null +++ b/R/getVisitTypeNames.R @@ -0,0 +1,91 @@ +#' Get visit type names +#' +#' @description +#' Retrieves a list of visit group concept IDs used in the stratified code counts table +#' along with their associated concept names and codes. +#' +#' @param CDMdbHandler A CDMdbHandler object that contains database connection details +#' @param stratifiedCodeCountsTable Name of the stratified code counts table. Defaults to "stratified_code_counts" +#' +#' @return A tibble with columns: +#' \itemize{ +#' \item `visitGroupConceptId` - The visit group concept ID +#' \item `conceptCode` - The OMOP concept code +#' \item `conceptName` - The human-readable concept name +#' } +#' +#' @importFrom checkmate assertClass +#' @importFrom DatabaseConnector renderTranslateQuerySql +#' @importFrom tibble as_tibble +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' # Get visit type names +#' result <- getVisitTypeNames(CDMdbHandler) +#' } +getVisitTypeNames <- function( + CDMdbHandler, + stratifiedCodeCountsTable = "stratified_code_counts") { + # + # VALIDATE + # + CDMdbHandler |> checkmate::assertClass("CDMdbHandler") + + connection <- CDMdbHandler$connectionHandler$getConnection() + vocabularyDatabaseSchema <- CDMdbHandler$vocabularyDatabaseSchema + resultsDatabaseSchema <- CDMdbHandler$resultsDatabaseSchema + + ParallelLogger::logInfo("getVisitTypeNames: Getting visit type names") + + # + # FUNCTION + # + sql <- " + SELECT DISTINCT + scc.visit_group_concept_id AS visitGroupConceptId, + c.concept_code AS conceptCode, + c.concept_name AS conceptName + FROM @resultsDatabaseSchema.@stratifiedCodeCountsTable scc + INNER JOIN @vocabularyDatabaseSchema.concept c + ON scc.visit_group_concept_id = c.concept_id + WHERE scc.visit_group_concept_id != 0 + ORDER BY scc.visit_group_concept_id;" + + visitTypeNames <- DatabaseConnector::renderTranslateQuerySql( + connection = connection, + sql = sql, + vocabularyDatabaseSchema = vocabularyDatabaseSchema, + resultsDatabaseSchema = resultsDatabaseSchema, + stratifiedCodeCountsTable = stratifiedCodeCountsTable + ) |> + tibble::as_tibble() + + return(visitTypeNames) +} + +#' Memoised version of getVisitTypeNames +#' +#' @description +#' A memoised version of the getVisitTypeNames function that caches results to improve performance +#' for repeated calls with the same parameters. The CDMdbHandler argument is omitted from +#' the cache key to allow sharing across different database connections. +#' +#' @param CDMdbHandler A CDMdbHandler object that contains database connection details +#' @param stratifiedCodeCountsTable Name of the stratified code counts table. Defaults to "stratified_code_counts" +#' +#' @importFrom memoise memoise +#' +#' @return A tibble with columns: +#' \itemize{ +#' \item `visitGroupConceptId` - The visit group concept ID +#' \item `conceptCode` - The OMOP concept code +#' \item `conceptName` - The human-readable concept name +#' } +#' +#' @export +getVisitTypeNames_memoise <- memoise::memoise( + getVisitTypeNames, + omit_args = "CDMdbHandler" +) diff --git a/R/helper.R b/R/helper.R index 585b296..1e89059 100644 --- a/R/helper.R +++ b/R/helper.R @@ -99,7 +99,6 @@ helper_FinnGen_getDatabaseFileCounts <- function() { - #' Create SQLite database from CDM database #' #' @description @@ -160,13 +159,21 @@ helper_createSqliteDatabaseFromDatabase <- function( # Get concept table sql <- "SELECT DISTINCT c.* FROM @vocabularyDatabaseSchema.concept c - WHERE c.concept_id IN (@conceptIdsToExtract)" - + WHERE c.concept_id IN (@conceptIdsToExtract) + -- Include also the concepts in visit_group_concept_id in stratified_code_counts table + UNION + SELECT DISTINCT c.* FROM @vocabularyDatabaseSchema.concept c + JOIN @resultsDatabaseSchema.@stratifiedCodeCountsTable scc ON c.concept_id = scc.visit_group_concept_id + WHERE scc.concept_id IN (@conceptIdsToExtract) OR scc.maps_to_concept_id IN (@conceptIdsToExtract) + " + concept <- DatabaseConnector::renderTranslateQuerySql( connection = sourceConnection, sql = sql, vocabularyDatabaseSchema = sourceVocabularyDatabaseSchema, - conceptIdsToExtract = paste(conceptIdsToExtract, collapse = ",") + conceptIdsToExtract = paste(conceptIdsToExtract, collapse = ","), + resultsDatabaseSchema = sourceResultsDatabaseSchema, + stratifiedCodeCountsTable = paste0("stratified_", codeCountsTable) ) |> tibble::as_tibble() diff --git a/R/runApiServer.R b/R/runApiServer.R index 93293b6..b165d98 100644 --- a/R/runApiServer.R +++ b/R/runApiServer.R @@ -47,11 +47,11 @@ runApiServer <- function( ParallelLogger::logInfo("No path to database config provided. Using the test counts only database.") # if not provided, use the test counts only database test_databasesConfig <- HadesExtras_readAndParseYaml( - pathToYalmFile = system.file("testdata", "config", "onlyCounts_databasesConfig.yml", package = "ROMOPAPI"), + pathToYalmFile = system.file("testdata", "config", "databasesConfig.yml", package = "ROMOPAPI"), pathToFinnGenCountsSqlite = helper_FinnGen_getDatabaseFileCounts() ) - cohortTableHandlerConfig <- test_databasesConfig[[1]]$cohortTableHandler + cohortTableHandlerConfig <- test_databasesConfig$FC$cohortTableHandler # Create CDMdbHandler CDMdbHandler <- HadesExtras_createCDMdbHandlerFromList(cohortTableHandlerConfig, loadConnectionChecksLevel = "basicChecks") diff --git a/inst/plumber/plumber.R b/inst/plumber/plumber.R index 2db77da..fdd26ed 100644 --- a/inst/plumber/plumber.R +++ b/inst/plumber/plumber.R @@ -125,4 +125,10 @@ function(res, feedback = "") { sendFeedback(feedback) res$status <- 200 return(list(message = "Feedback sent")) +} + +#* Get the list of visit type names +#* @get /getVisitTypeNames +function() { + getVisitTypeNames_memoise(CDMdbHandler = CDMdbHandler) } \ No newline at end of file diff --git a/inst/sql/sql_server/appendToStratrifiedCodeCountsTable.sql b/inst/sql/sql_server/appendToStratrifiedCodeCountsTable.sql index 7026768..03e56cc 100644 --- a/inst/sql/sql_server/appendToStratrifiedCodeCountsTable.sql +++ b/inst/sql/sql_server/appendToStratrifiedCodeCountsTable.sql @@ -1,10 +1,11 @@ -- Insert into code_stratified_counts table INSERT INTO @resultsDatabaseSchema.@stratifiedCodeCountsTable --- calculate counts per each group of concept_id, calendar_year, gender_concept_id, age_decil +-- calculate counts per each group of concept_id, calendar_year, gender_concept_id, age_decil, visit_group_concept_id SELECT CAST(ccm.concept_id AS BIGINT) AS concept_id, CAST(ccm.maps_to_concept_id AS BIGINT) AS maps_to_concept_id, + CAST(ccm.visit_group_concept_id AS BIGINT) AS visit_group_concept_id, CAST(ccm.calendar_year AS BIGINT) AS calendar_year, CAST(ccm.gender_concept_id AS BIGINT) AS gender_concept_id, CAST(ccm.age_decile AS BIGINT) AS age_decile, @@ -13,13 +14,17 @@ FROM ( -- get all person_ids with the concept_id with in a valid observation period -- calculate the calendar year, gender_concept_id, age_decile -- calculate the min_calendar_year, used to find the first event in history per code and person + -- if visit_source_group_concept_ids are provided, calculate the visit_group_concept_id based on the given groups, + -- if not on a given visit_group_concept_id keep the original visit_source_concept_id as visit_group_concept_id + -- if not event has a visit_occurrence_id or visit_source_concept_id, assign 0 as visit_group_concept_id SELECT p.person_id AS person_id, t.@concept_id_field AS concept_id, t.@maps_to_concept_id_field AS maps_to_concept_id, YEAR(t.@date_field) AS calendar_year, p.gender_concept_id AS gender_concept_id, - FLOOR((YEAR(t.@date_field) - p.year_of_birth) / 10) AS age_decile + FLOOR((YEAR(t.@date_field) - p.year_of_birth) / 10) AS age_decile, + {@visit_group_concept_ids != 0} ? {COALESCE(vmap.visit_group_concept_id, vo.visit_source_concept_id)} : {0} AS visit_group_concept_id FROM @cdmDatabaseSchema.person p JOIN @@ -34,6 +39,24 @@ FROM ( t.@date_field >= op.observation_period_start_date AND t.@date_field <= op.observation_period_end_date +{@visit_group_concept_ids != 0}?{ + LEFT JOIN + @cdmDatabaseSchema.visit_occurrence vo + ON + t.visit_occurrence_id = vo.visit_occurrence_id + LEFT JOIN ( + SELECT + ca.ancestor_concept_id AS visit_group_concept_id, + ca.descendant_concept_id AS visit_source_concept_id + FROM + @cdmDatabaseSchema.concept_ancestor ca + WHERE + ca.ancestor_concept_id IN (@visit_group_concept_ids) + + ) AS vmap + ON + vo.visit_source_concept_id = vmap.visit_source_concept_id +} WHERE t.@concept_id_field != 0 ) ccm @@ -42,4 +65,5 @@ GROUP BY ccm.maps_to_concept_id, ccm.calendar_year, ccm.gender_concept_id, - ccm.age_decile \ No newline at end of file + ccm.age_decile, + ccm.visit_group_concept_id; \ No newline at end of file diff --git a/inst/sql/sql_server/createCodeCountsTable.sql b/inst/sql/sql_server/createCodeCountsTable.sql index d1d7845..ad3119f 100644 --- a/inst/sql/sql_server/createCodeCountsTable.sql +++ b/inst/sql/sql_server/createCodeCountsTable.sql @@ -43,7 +43,7 @@ temp_concept_ancestor AS ( SUM(record_counts) AS record_counts FROM ( SELECT DISTINCT - maps_to_concept_id, calendar_year, gender_concept_id, age_decile, record_counts + maps_to_concept_id, visit_group_concept_id, calendar_year, gender_concept_id, age_decile, record_counts FROM @resultsDatabaseSchema.@stratifiedCodeCountsTable -- do not take if maps_to_concept_id is a standard concept WHERE concept_id != maps_to_concept_id diff --git a/inst/testdata/config/atlasDev_databasesConfig.yml b/inst/testdata/config/atlasDev_databasesConfig.yml deleted file mode 100644 index 335ccac..0000000 --- a/inst/testdata/config/atlasDev_databasesConfig.yml +++ /dev/null @@ -1,42 +0,0 @@ -BQ5k: - cohortTableHandler: - database: - databaseId: BQ5k - databaseName: bigquery5k - databaseDescription: BigQuery database - connection: - connectionDetailsSettings: - dbms: bigquery - drv: bigrquery::bigquery() - project: atlas-development-270609 - billing: atlas-development-270609 - bigint: integer64 - tempEmulationSchema: atlas-development-270609.sandbox - cdm: - cdmDatabaseSchema: atlas-development-270609.finngen_omop_r13_v3_5k - vocabularyDatabaseSchema: atlas-development-270609.finngen_omop_r13_v3_5k - resultsDatabaseSchema: atlas-development-270609.sandbox - cohortTable: - cohortDatabaseSchema: atlas-development-270609.sandbox - cohortTableName: hadesextras_test_cohort_table_ -BQ500k: - cohortTableHandler: - database: - databaseId: BQ500k - databaseName: bigquery500k - databaseDescription: BigQuery database with 500k patients - connection: - connectionDetailsSettings: - dbms: bigquery - drv: bigrquery::bigquery() - project: atlas-development-270609 - billing: atlas-development-270609 - bigint: integer64 - tempEmulationSchema: atlas-development-270609.sandbox - cdm: - cdmDatabaseSchema: atlas-development-270609.etl_sam_dev_omop - vocabularyDatabaseSchema: atlas-development-270609.etl_sam_dev_omop - resultsDatabaseSchema: atlas-development-270609.sandbox - cohortTable: - cohortDatabaseSchema: atlas-development-270609.sandbox - cohortTableName: hadesextras_test_cohort_table_ diff --git a/inst/testdata/config/databasesConfig.yml b/inst/testdata/config/databasesConfig.yml new file mode 100644 index 0000000..4a591be --- /dev/null +++ b/inst/testdata/config/databasesConfig.yml @@ -0,0 +1,150 @@ +# Eunomia-GiBleed +E1: + cohortTableHandler: + database: + databaseId: E1 + databaseName: GiBleed + databaseDescription: Eunomia database GiBleed + connection: + connectionDetailsSettings: + dbms: sqlite + server: + cdm: + cdmDatabaseSchema: main + vocabularyDatabaseSchema: main + resultsDatabaseSchema: main + cohortTable: + cohortDatabaseSchema: main + cohortTableName: test_cohort_table_ + atlasConfig: + webapiurl: https://api.ohdsi.org/WebAPI + sourcekey: + resultsshchema: +# Eunomia-MIMIC +E2: + cohortTableHandler: + database: + databaseId: E2 + databaseName: MIMIC + databaseDescription: Eunomia database MIMIC + connection: + connectionDetailsSettings: + dbms: sqlite + server: + cdm: + cdmDatabaseSchema: main + vocabularyDatabaseSchema: main + resultsDatabaseSchema: main + cohortTable: + cohortDatabaseSchema: main + cohortTableName: test_cohort_table_ + atlasConfig: + webapiurl: https://api.ohdsi.org/WebAPI + sourcekey: + resultsshchema: +# Eunomia-MIMICwrong +E3: + cohortTableHandler: + database: + databaseId: E3 + databaseName: MIMICwrong + databaseDescription: Eunomia database MIMIC wrong atlas + connection: + connectionDetailsSettings: + dbms: sqlite + server: + cdm: + cdmDatabaseSchema: main + vocabularyDatabaseSchema: main + resultsDatabaseSchema: main + cohortTable: + cohortDatabaseSchema: main + cohortTableName: test_cohort_table_ + atlasConfig: + webapiurl: https://api.ohdsi.org/wrong + sourcekey: + resultsshchema: +# Eunomia-FinnGen +E4: + cohortTableHandler: + database: + databaseId: E4 + databaseName: FinnGen + databaseDescription: Eunomia database FinnGen + connection: + connectionDetailsSettings: + dbms: sqlite + server: + cdm: + cdmDatabaseSchema: main + vocabularyDatabaseSchema: main + resultsDatabaseSchema: main + cohortTable: + cohortDatabaseSchema: main + cohortTableName: test_cohort_table_ + atlasConfig: + webapiurl: https://api.ohdsi.org/wrong + sourcekey: + resultsshchema: +# AtlasDevelopment-5k +BQ5K: + cohortTableHandler: + database: + databaseId: BQ5K + databaseName: bigquery5k + databaseDescription: BigQuery database + connection: + connectionDetailsSettings: + dbms: bigquery + drv: bigrquery::bigquery() + project: atlas-development-270609 + billing: atlas-development-270609 + bigint: integer64 + tempEmulationSchema: atlas-development-270609.sandbox + cdm: + cdmDatabaseSchema: atlas-development-270609.finngen_omop_dev_5k + vocabularyDatabaseSchema: atlas-development-270609.finngen_omop_dev_5k + resultsDatabaseSchema: atlas-development-270609.finngen_omop_results_dev_5k + cohortTable: + cohortDatabaseSchema: atlas-development-270609.sandbox + cohortTableName: hadesextras_test_cohort_table_ +# AtlasDevelopment-full +BQfull: + cohortTableHandler: + database: + databaseId: BQ + databaseName: bigquery + databaseDescription: BigQuery database with 500k patients + connection: + connectionDetailsSettings: + dbms: bigquery + drv: bigrquery::bigquery() + project: atlas-development-270609 + billing: atlas-development-270609 + bigint: integer64 + tempEmulationSchema: atlas-development-270609.sandbox + cdm: + cdmDatabaseSchema: atlas-development-270609.finngen_omop_dev + vocabularyDatabaseSchema: atlas-development-270609.finngen_omop_dev + resultsDatabaseSchema: atlas-development-270609.finngen_omop_results_dev + cohortTable: + cohortDatabaseSchema: atlas-development-270609.sandbox + cohortTableName: hadesextras_test_cohort_table_ +# FinnGenCounts +FC: + cohortTableHandler: + database: + databaseId: FC + databaseName: FinnGenCounts + databaseDescription: Counts only FinnGen + connection: + connectionDetailsSettings: + dbms: sqlite + server: + cdm: + cdmDatabaseSchema: main + vocabularyDatabaseSchema: main + resultsDatabaseSchema: main + cohortTable: + cohortDatabaseSchema: main + cohortTableName: test_cohort_table_ diff --git a/inst/testdata/config/eunomia_databasesConfig.yml b/inst/testdata/config/eunomia_databasesConfig.yml deleted file mode 100644 index 081f95a..0000000 --- a/inst/testdata/config/eunomia_databasesConfig.yml +++ /dev/null @@ -1,60 +0,0 @@ -E1: - cohortTableHandler: - database: - databaseId: E1 - databaseName: GiBleed - databaseDescription: Eunomia database GiBleed - connection: - connectionDetailsSettings: - dbms: sqlite - server: - cdm: - cdmDatabaseSchema: main - vocabularyDatabaseSchema: main - cohortTable: - cohortDatabaseSchema: main - cohortTableName: test_cohort_table_ - atlasConfig: - webapiurl: https://api.ohdsi.org/WebAPI - sourcekey: - resultsshchema: -E2: - cohortTableHandler: - database: - databaseId: E2 - databaseName: MIMIC - databaseDescription: Eunomia database MIMIC - connection: - connectionDetailsSettings: - dbms: sqlite - server: - cdm: - cdmDatabaseSchema: main - vocabularyDatabaseSchema: main - cohortTable: - cohortDatabaseSchema: main - cohortTableName: test_cohort_table_ - atlasConfig: - webapiurl: https://api.ohdsi.org/WebAPI - sourcekey: - resultsshchema: -F1: - cohortTableHandler: - database: - databaseId: F1 - databaseName: FinnGen - databaseDescription: Eunomia database FinnGen - connection: - connectionDetailsSettings: - dbms: sqlite - server: - cdm: - cdmDatabaseSchema: main - vocabularyDatabaseSchema: main - cohortTable: - cohortDatabaseSchema: main - cohortTableName: test_cohort_table_ - atlasConfig: - webapiurl: https://api.ohdsi.org/WebAPI - sourcekey: - resultsshchema: diff --git a/inst/testdata/config/onlyCounts_databasesConfig.yml b/inst/testdata/config/onlyCounts_databasesConfig.yml deleted file mode 100644 index c1b0238..0000000 --- a/inst/testdata/config/onlyCounts_databasesConfig.yml +++ /dev/null @@ -1,20 +0,0 @@ -FC: - cohortTableHandler: - database: - databaseId: FC - databaseName: FinnGenCounts - databaseDescription: Counts only FinnGen - connection: - connectionDetailsSettings: - dbms: sqlite - server: - cdm: - cdmDatabaseSchema: main - vocabularyDatabaseSchema: main - cohortTable: - cohortDatabaseSchema: main - cohortTableName: test_cohort_table_ - atlasConfig: - webapiurl: https://api.ohdsi.org/WebAPI - sourcekey: - resultsshchema: diff --git a/inst/testdata/data/FinnGenR13_countsOnly.sqlite b/inst/testdata/data/FinnGenR13_countsOnly.sqlite index 000c65f..cbde4ca 100644 Binary files a/inst/testdata/data/FinnGenR13_countsOnly.sqlite and b/inst/testdata/data/FinnGenR13_countsOnly.sqlite differ diff --git a/inst/testdata/data/createTestingData.R b/inst/testdata/data/createTestingData.R index 6761449..c223f98 100644 --- a/inst/testdata/data/createTestingData.R +++ b/inst/testdata/data/createTestingData.R @@ -1,5 +1,5 @@ # Get connection -Sys.setenv(HADESEXTAS_TESTING_ENVIRONMENT = "AtlasDevelopment-DBI") +Sys.setenv(HADESEXTAS_TESTING_ENVIRONMENT = "AtlasDevelopment-full") Sys.setenv(BUILD_COUNTS_TABLE = "FALSE") source("tests/testthat/setup.R") @@ -9,13 +9,45 @@ conceptIds <- c( 45596282, # ICD10: Asthma 21601855, # ATC level 4: C10AA (Statins) 320136, # Big graph, parent of Asthma snomed concept (Disorders of the respiratory system) - 4024567,# biger + 4024567, # biger 21600744 # bug in plot ) -CDMdbHandler <- HadesExtras_createCDMdbHandlerFromList(test_cohortTableHandlerConfig, loadConnectionChecksLevel = "basicChecks") +CDMdbHandler <- HadesExtras_createCDMdbHandlerFromList( + test_cohortTableHandlerConfig, + loadConnectionChecksLevel = "basicChecks" +) # uncomment to create code counts tables -createCodeCountsTables(CDMdbHandler) + +visitSourceGroupConceptIds = c( + # longitudinal + 2002330246, # INPAT + 2002330247, # OPER_IN + 2002330248, # OPER_OUT + 2002330249, # OUTPAT + 2002330250, # PRIM_OUT + 2002330102, # REIM + 2002330104, # DEATH + 2002330101, # PURCH + 2002330103, # CANC + # registers + 2002330245, # KANTA + 2002330106, # BIOBANK + 2002330186, # KIDNEY + 2002330119, # VISION + 2002330105, # BIRTH_MOTHER + # Drugs + 2002330251, # PRESCRIPTION + 2002330252, # DELIVERY + 2002330253, # PRESCRIPTION_DELIVERY + 2002330254, # DELIVERY_KELA + 2002330255 # PRESCRIPTION_DELIVERY_KELA +) + +createCodeCountsTables( + CDMdbHandler, + visitSourceGroupConceptIds = visitSourceGroupConceptIds +) helper_createSqliteDatabaseFromDatabase( CDMdbHandler, conceptIds = conceptIds, @@ -24,11 +56,20 @@ helper_createSqliteDatabaseFromDatabase( # Test -connection <- DatabaseConnector::connect(DatabaseConnector::createConnectionDetails(dbms = "sqlite", server = "inst/testdata/data/FinnGenR13_countsOnly.sqlite")) +connection <- DatabaseConnector::connect(DatabaseConnector::createConnectionDetails( + dbms = "sqlite", + server = "inst/testdata/data/FinnGenR13_countsOnly.sqlite" +)) -DatabaseConnector::dbListTables(connection) |> +DatabaseConnector::dbListTables(connection) |> sort() |> - expect_equal(c("cdm_source", "code_counts", "concept", "concept_ancestor", "stratified_code_counts")) + expect_equal(c( + "cdm_source", + "code_counts", + "concept", + "concept_ancestor", + "stratified_code_counts" + )) dplyr::tbl(connection, "concept") |> dplyr::count() |> @@ -55,3 +96,13 @@ dplyr::tbl(connection, "cdm_source") |> dplyr::pull(n) |> expect_gt(0) +# visit_group_concept_id +dplyr::tbl(connection, "stratified_code_counts") |> + count(visit_group_concept_id) |> + left_join(dplyr::tbl(connection, "concept"), by=c("visit_group_concept_id"="concept_id")) |> + print(n =2122) + +dplyr::tbl(connection, "stratified_code_counts") |> + dplyr::distinct(visit_group_concept_id) |> + dplyr::pull(visit_group_concept_id) |> + (\(x) expect_false(all(x %in% visitSourceGroupConceptIds)))() diff --git a/llms.md b/llms.md new file mode 100644 index 0000000..f48fa62 --- /dev/null +++ b/llms.md @@ -0,0 +1,37 @@ + +# R Package Development Rules + +## Pipe Operator +- **Always use `|>` (native pipe) instead of `%>%` (magrittr pipe)** + +## Roxygen Documentation +- **Use Roxygen2 for all function documentation** +- **Always include `@importFrom` directives** for functions imported from other packages +- **Do NOT include `@examples` sections** in Roxygen comments +- Standard format: `@title`, `@description`, `@param`, `@return`, `@importFrom package function` +- `@export` is only used for functions that are intended to be used by the user. Internal functions should not be exported and must start with a dot. + +## Package Preferences +- **Prefer tidyverse functions over base R** when possible +- Use `dplyr`, `readr`, `tibble`, `stringr`, `purrr` instead of base R equivalents +- Use base R when it's more appropriate or when tidyverse adds unnecessary dependencies + + +## Code Style +- Use **camelCase**: function and variable names start with lowercase, package names start with uppercase +- Function names are verbs, variable names are nouns (e.g., `fitModel`, `population`) +- Use `<-` for assignment, not `=` +- Place spaces around infix operators (`=`, `+`, `-`, `<-`, etc.) and after commas +- Always use curly braces `{}` for if-then-else, even single statements +- Use named arguments when calling functions with more than one argument +- Pipes (`|>`) should be at the end of the line +- Limit lines to 100 characters +- Comments explain *why*, not *what*; use `#` with a space + + +# Project context + +This project is a API build with Plumber to provide certain data from a database. +The database is an OMOP Common Data Model. +Use the OMOP CDM tables and columns on the sql calls. +The sql forllows the Hades sql flavour. \ No newline at end of file diff --git a/man/createCodeCountsTables.Rd b/man/createCodeCountsTables.Rd index c3390fa..5274007 100644 --- a/man/createCodeCountsTables.Rd +++ b/man/createCodeCountsTables.Rd @@ -7,7 +7,8 @@ createCodeCountsTables( CDMdbHandler, domains = NULL, - codeCountsTable = "code_counts" + codeCountsTable = "code_counts", + visitSourceGroupConceptIds = 0 ) } \arguments{ @@ -16,6 +17,8 @@ createCodeCountsTables( \item{domains}{Optional vector of domains to process. If NULL, processes all standard domains} \item{codeCountsTable}{Name of the table to create. Defaults to "code_counts"} + +\item{visitSourceGroupConceptIds}{Optional vector of visit source group concept IDs to filter by. Defaults to 0} } \value{ Nothing. Creates a table called 'code_counts' in the results schema with columns: diff --git a/man/createStratifiedCodeCountsTable.Rd b/man/createStratifiedCodeCountsTable.Rd index d78642f..0d22548 100644 --- a/man/createStratifiedCodeCountsTable.Rd +++ b/man/createStratifiedCodeCountsTable.Rd @@ -7,7 +7,8 @@ createStratifiedCodeCountsTable( CDMdbHandler, domains = NULL, - stratifiedCodeCountsTable = "stratified_code_counts" + stratifiedCodeCountsTable = "stratified_code_counts", + visitSourceGroupConceptIds = 0 ) } \arguments{ @@ -16,6 +17,8 @@ createStratifiedCodeCountsTable( \item{domains}{Optional data frame defining domains to process. If NULL, uses standard OMOP domains} \item{stratifiedCodeCountsTable}{Name of the stratified counts table to create. Defaults to "stratified_code_counts"} + +\item{visitSourceGroupConceptIds}{Optional vector of visit source group concept IDs to filter by. Defaults to 0} } \value{ Nothing. Creates a table called 'stratified_code_counts' in the results schema with columns: diff --git a/man/getVisitTypeNames.Rd b/man/getVisitTypeNames.Rd new file mode 100644 index 0000000..242e90c --- /dev/null +++ b/man/getVisitTypeNames.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/getVisitTypeNames.R +\name{getVisitTypeNames} +\alias{getVisitTypeNames} +\title{Get visit type names} +\usage{ +getVisitTypeNames( + CDMdbHandler, + stratifiedCodeCountsTable = "stratified_code_counts" +) +} +\arguments{ +\item{CDMdbHandler}{A CDMdbHandler object that contains database connection details} + +\item{stratifiedCodeCountsTable}{Name of the stratified code counts table. Defaults to "stratified_code_counts"} +} +\value{ +A tibble with columns: +\itemize{ +\item \code{visitGroupConceptId} - The visit group concept ID +\item \code{conceptCode} - The OMOP concept code +\item \code{conceptName} - The human-readable concept name +} +} +\description{ +Retrieves a list of visit group concept IDs used in the stratified code counts table +along with their associated concept names and codes. +} +\examples{ +\dontrun{ +# Get visit type names +result <- getVisitTypeNames(CDMdbHandler) +} +} diff --git a/man/getVisitTypeNames_memoise.Rd b/man/getVisitTypeNames_memoise.Rd new file mode 100644 index 0000000..bed4692 --- /dev/null +++ b/man/getVisitTypeNames_memoise.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/getVisitTypeNames.R +\name{getVisitTypeNames_memoise} +\alias{getVisitTypeNames_memoise} +\title{Memoised version of getVisitTypeNames} +\usage{ +getVisitTypeNames_memoise( + CDMdbHandler, + stratifiedCodeCountsTable = "stratified_code_counts" +) +} +\arguments{ +\item{CDMdbHandler}{A CDMdbHandler object that contains database connection details} + +\item{stratifiedCodeCountsTable}{Name of the stratified code counts table. Defaults to "stratified_code_counts"} +} +\value{ +A tibble with columns: +\itemize{ +\item \code{visitGroupConceptId} - The visit group concept ID +\item \code{conceptCode} - The OMOP concept code +\item \code{conceptName} - The human-readable concept name +} +} +\description{ +A memoised version of the getVisitTypeNames function that caches results to improve performance +for repeated calls with the same parameters. The CDMdbHandler argument is omitted from +the cache key to allow sharing across different database connections. +} diff --git a/tests/testmanual/manualtest-runApiServer.R b/tests/testmanual/manualtest-runApiServer.R index 5a91564..c3a24cd 100644 --- a/tests/testmanual/manualtest-runApiServer.R +++ b/tests/testmanual/manualtest-runApiServer.R @@ -1,9 +1,7 @@ -databaseConfig <- yaml::read_yaml("inst/testdata/config/onlyCounts_databasesConfig.yml") - ROMOPAPI::runApiServer( - cohortTableHandlerConfig = databaseConfig$cohortTableHandler, + cohortTableHandlerConfig = NULL, buildCountsTable = FALSE ) diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index dce4805..c825629 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -3,14 +3,16 @@ # # Sys.setenv(HADESEXTAS_TESTING_ENVIRONMENT = "Eunomia-GiBleed") -# Sys.setenv(HADESEXTAS_TESTING_ENVIRONMENT = "AtlasDevelopment-DBI") +# Sys.setenv(HADESEXTAS_TESTING_ENVIRONMENT = "Eunomia-MIMIC") # Sys.setenv(HADESEXTAS_TESTING_ENVIRONMENT = "Eunomia-FinnGen") +# Sys.setenv(HADESEXTAS_TESTING_ENVIRONMENT = "AtlasDevelopment-5k") +# Sys.setenv(HADESEXTAS_TESTING_ENVIRONMENT = "AtlasDevelopment-full") # Sys.setenv(HADESEXTAS_TESTING_ENVIRONMENT = "OnlyCounts-FinnGen") testingDatabase <- Sys.getenv("HADESEXTAS_TESTING_ENVIRONMENT") buildCountsTable <- Sys.getenv("BUILD_COUNTS_TABLE") # check correct settings -possibleDatabases <- c("Eunomia-GiBleed", "Eunomia-MIMIC", "AtlasDevelopment-DBI", "Eunomia-FinnGen", "OnlyCounts-FinnGen") +possibleDatabases <- c("Eunomia-GiBleed", "Eunomia-MIMIC", "Eunomia-FinnGen", "AtlasDevelopment-5k", "AtlasDevelopment-full", "OnlyCounts-FinnGen") if (!(testingDatabase %in% possibleDatabases)) { message("Please set a valid testing environment in envar HADESEXTAS_TESTING_ENVIRONMENT, from: ", paste(possibleDatabases, collapse = ", ")) stop() @@ -26,10 +28,10 @@ if (! buildCountsTable %in% c("TRUE", "FALSE")) { # if (testingDatabase |> stringr::str_starts("OnlyCounts-FinnGen")) { test_databasesConfig <- HadesExtras_readAndParseYaml( - pathToYalmFile = system.file("testdata", "config", "onlyCounts_databasesConfig.yml", package = "ROMOPAPI"), + pathToYalmFile = system.file("testdata", "config", "databasesConfig.yml", package = "ROMOPAPI"), pathToFinnGenCountsSqlite = helper_FinnGen_getDatabaseFileCounts() ) - test_cohortTableHandlerConfig <- test_databasesConfig[[1]]$cohortTableHandler + test_cohortTableHandlerConfig <- test_databasesConfig$FC$cohortTableHandler buildCountsTable <- "FALSE" } @@ -43,34 +45,32 @@ if (testingDatabase |> stringr::str_starts("Eunomia")) { stop() } - pathToGiBleedEunomiaSqlite <- "" - pathToMIMICEunomiaSqlite <- "" + pathToGiBleedEunomiaSqlite <- Eunomia::getDatabaseFile("GiBleed", overwrite = FALSE) + pathToMIMICEunomiaSqlite <- Eunomia::getDatabaseFile("MIMIC", overwrite = FALSE) + pathToFinnGenEunomiaSqlite <- "" - if (testingDatabase |> stringr::str_ends("GiBleed")) { - pathToGiBleedEunomiaSqlite <- Eunomia::getDatabaseFile("GiBleed", overwrite = FALSE) - } - if (testingDatabase |> stringr::str_ends("MIMIC")) { - pathToMIMICEunomiaSqlite <- Eunomia::getDatabaseFile("MIMIC", overwrite = FALSE) - } if (testingDatabase |> stringr::str_ends("FinnGen")) { pathToFinnGenEunomiaSqlite <- helper_FinnGen_getDatabaseFile() } test_databasesConfig <- HadesExtras_readAndParseYaml( - pathToYalmFile = system.file("testdata", "config", "eunomia_databasesConfig.yml", package = "ROMOPAPI"), + pathToYalmFile = system.file("testdata", "config", "databasesConfig.yml", package = "ROMOPAPI"), pathToGiBleedEunomiaSqlite = pathToGiBleedEunomiaSqlite, pathToMIMICEunomiaSqlite = pathToMIMICEunomiaSqlite, pathToFinnGenEunomiaSqlite = pathToFinnGenEunomiaSqlite ) if (testingDatabase |> stringr::str_ends("GiBleed")) { - test_cohortTableHandlerConfig <- test_databasesConfig[[1]]$cohortTableHandler + test_cohortTableHandlerConfig <- test_databasesConfig$E1$cohortTableHandler } if (testingDatabase |> stringr::str_ends("MIMIC")) { - test_cohortTableHandlerConfig <- test_databasesConfig[[2]]$cohortTableHandler + test_cohortTableHandlerConfig <- test_databasesConfig$E2$cohortTableHandler } if (testingDatabase |> stringr::str_ends("FinnGen")) { - test_cohortTableHandlerConfig <- test_databasesConfig[[3]]$cohortTableHandler + test_cohortTableHandlerConfig <- test_databasesConfig$E3$cohortTableHandler + } + if (testingDatabase |> stringr::str_ends("FinnGen")) { + test_cohortTableHandlerConfig <- test_databasesConfig$E4$cohortTableHandler } } @@ -78,7 +78,7 @@ if (testingDatabase |> stringr::str_starts("Eunomia")) { # # AtlasDevelopmet-DBI Database # -if (testingDatabase %in% c("AtlasDevelopment-DBI")) { +if (testingDatabase |> stringr::str_starts("AtlasDevelopment")) { if (Sys.getenv("GCP_SERVICE_KEY") == "") { message("GCP_SERVICE_KEY not set. Please set this environment variable to the path of the GCP service key.") stop() @@ -87,14 +87,18 @@ if (testingDatabase %in% c("AtlasDevelopment-DBI")) { bigrquery::bq_auth(path = Sys.getenv("GCP_SERVICE_KEY")) test_databasesConfig <- HadesExtras_readAndParseYaml( - pathToYalmFile = system.file("testdata", "config", "atlasDev_databasesConfig.yml", package = "ROMOPAPI") + pathToYalmFile = system.file("testdata", "config", "databasesConfig.yml", package = "ROMOPAPI") ) - test_cohortTableHandlerConfig <- test_databasesConfig[[2]]$cohortTableHandler + if (testingDatabase |> stringr::str_ends("5k")) { + test_cohortTableHandlerConfig <- test_databasesConfig$BQ5K$cohortTableHandler + } + if (testingDatabase |> stringr::str_ends("full")) { + test_cohortTableHandlerConfig <- test_databasesConfig$BQfull$cohortTableHandler + } } - # # INFORM USER # diff --git a/tests/testthat/test-createCodeCountsTable.R b/tests/testthat/test-createCodeCountsTable.R index 18af03c..bb1e634 100644 --- a/tests/testthat/test-createCodeCountsTable.R +++ b/tests/testthat/test-createCodeCountsTable.R @@ -1,8 +1,11 @@ test_that("createStratifiedCodeCountsTable works with duplicated counts", { # only works in a full CDM database - skip_if(testingDatabase == "OnlyCounts-FinnGen") + skip_if(testingDatabase != "AtlasDevelopment-5k") - CDMdbHandler <- HadesExtras_createCDMdbHandlerFromList(test_cohortTableHandlerConfig, loadConnectionChecksLevel = "basicChecks") + CDMdbHandler <- HadesExtras_createCDMdbHandlerFromList( + test_cohortTableHandlerConfig, + loadConnectionChecksLevel = "basicChecks" + ) withr::defer({ CDMdbHandler <- NULL gc() @@ -12,27 +15,141 @@ test_that("createStratifiedCodeCountsTable works with duplicated counts", { resultsDatabaseSchema <- CDMdbHandler$resultsDatabaseSchema withr::defer({ - CDMdbHandler$connectionHandler$executeSql(paste0("DROP TABLE ", resultsDatabaseSchema, ".", stratifiedCodeCountsTable)) + CDMdbHandler$connectionHandler$executeSql(paste0( + "DROP TABLE ", + resultsDatabaseSchema, + ".", + stratifiedCodeCountsTable + )) }) - domain <- tibble::tribble( - ~domain_id, ~table_name, ~concept_id_field, ~date_field, ~maps_to_concept_id_field, - "Condition", "condition_occurrence", "condition_concept_id", "condition_start_date", "condition_source_concept_id" + domain <- tibble::tribble( + ~domain_id , ~table_name , ~concept_id_field , ~date_field , ~maps_to_concept_id_field , + "Condition" , "condition_occurrence" , "condition_concept_id" , "condition_start_date" , "condition_source_concept_id" ) # codeAtomicCountsWithDuplicatedCounts suppressWarnings( - createStratifiedCodeCountsTable(CDMdbHandler, domains = domain, stratifiedCodeCountsTable = stratifiedCodeCountsTable) + createStratifiedCodeCountsTable( + CDMdbHandler, + domains = domain, + stratifiedCodeCountsTable = stratifiedCodeCountsTable + ) ) - stratifiedCodeCounts <- CDMdbHandler$connectionHandler$tbl(I(paste0(resultsDatabaseSchema, ".", stratifiedCodeCountsTable))) + stratifiedCodeCounts <- CDMdbHandler$connectionHandler$tbl(I(paste0( + resultsDatabaseSchema, + ".", + stratifiedCodeCountsTable + ))) + + # check that the table was created + nrows <- stratifiedCodeCounts |> + dplyr::count() |> + dplyr::pull(n) + + nrows |> expect_gt(0) - # check that the table was created + # check that the table was created with correct columns stratifiedCodeCounts |> - dplyr::count() |> - dplyr::pull(n) |> - expect_gt(0) - + head() |> + dplyr::collect() |> + colnames() |> + expect_equal(c( + "concept_id", + "maps_to_concept_id", + "visit_group_concept_id", + "calendar_year", + "gender_concept_id", + "age_decile", + "record_counts" + )) + + stratifiedCodeCounts |> + dplyr::filter(visit_group_concept_id != 0) |> + dplyr::count() |> + dplyr::pull(n) |> + expect_equal(0) +}) + +test_that("createStratifiedCodeCountsTable works with visit_source_group_concept_ids", { + # only works in a full CDM database + skip_if(testingDatabase != "AtlasDevelopment-5k") + + CDMdbHandler <- HadesExtras_createCDMdbHandlerFromList( + test_cohortTableHandlerConfig, + loadConnectionChecksLevel = "basicChecks" + ) + withr::defer({ + CDMdbHandler <- NULL + gc() + }) + + stratifiedCodeCountsTable <- "stratified_code_counts_test0" + resultsDatabaseSchema <- CDMdbHandler$resultsDatabaseSchema + + withr::defer({ + CDMdbHandler$connectionHandler$executeSql(paste0( + "DROP TABLE ", + resultsDatabaseSchema, + ".", + stratifiedCodeCountsTable + )) + }) + + domain <- tibble::tribble( + ~domain_id , ~table_name , ~concept_id_field , ~date_field , ~maps_to_concept_id_field , + "Condition" , "condition_occurrence" , "condition_concept_id" , "condition_start_date" , "condition_source_concept_id" + ) + + visitSourceGroupConceptIds = c( + # longitudinal + 2002330246, # INPAT + 2002330247, # OPER_IN + 2002330248, # OPER_OUT + 2002330249, # OUTPAT + 2002330250, # PRIM_OUT + 2002330102, # REIM + 2002330104, # DEATH + 2002330101, # PURCH + 2002330103, # CANC + # registers + 2002330245, # KANTA + 2002330106, # BIOBANK + 2002330186, # KIDNEY + 2002330119, # VISION + 2002330105, # BIRTH_MOTHER + # Drugs + 2002330251, # PRESCRIPTION + 2002330252, # DELIVERY + 2002330253, # PRESCRIPTION_DELIVERY + 2002330254, # DELIVERY_KELA + 2002330255 # PRESCRIPTION_DELIVERY_KELA + ) + + # codeAtomicCountsWithDuplicatedCounts + suppressWarnings( + createStratifiedCodeCountsTable( + CDMdbHandler, + domains = domain, + stratifiedCodeCountsTable = stratifiedCodeCountsTable, + visitSourceGroupConceptIds = visitSourceGroupConceptIds + ) + ) + + stratifiedCodeCounts <- CDMdbHandler$connectionHandler$tbl(I(paste0( + resultsDatabaseSchema, + ".", + stratifiedCodeCountsTable + ))) + + # check that the table was created + nrows <- stratifiedCodeCounts |> + dplyr::count() |> + dplyr::pull(n) + + nrows |> expect_gt(0) + # check that the table was created with correct columns stratifiedCodeCounts |> head() |> @@ -41,11 +158,133 @@ test_that("createStratifiedCodeCountsTable works with duplicated counts", { expect_equal(c( "concept_id", "maps_to_concept_id", + "visit_group_concept_id", "calendar_year", "gender_concept_id", "age_decile", "record_counts" )) + + + stratifiedCodeCounts |> + dplyr::filter(visit_group_concept_id == 0) |> + dplyr::count() |> + dplyr::pull(n) |> + expect_equal(0) + + ## All the visit_group_concept_id are in the provided visitSourceGroupConceptIds + stratifiedCodeCounts |> + dplyr::distinct(visit_group_concept_id) |> + dplyr::pull(visit_group_concept_id) |> + (\(x) expect_true(all(x %in% visitSourceGroupConceptIds)))() +}) + + +test_that("createStratifiedCodeCountsTable works with visit_source_group_concept_ids if one missing takes childern", { + # only works in a full CDM database + skip_if(testingDatabase != "AtlasDevelopment-5k") + + CDMdbHandler <- HadesExtras_createCDMdbHandlerFromList( + test_cohortTableHandlerConfig, + loadConnectionChecksLevel = "basicChecks" + ) + withr::defer({ + CDMdbHandler <- NULL + gc() + }) + + stratifiedCodeCountsTable <- "stratified_code_counts_test0" + resultsDatabaseSchema <- CDMdbHandler$resultsDatabaseSchema + + withr::defer({ + CDMdbHandler$connectionHandler$executeSql(paste0( + "DROP TABLE ", + resultsDatabaseSchema, + ".", + stratifiedCodeCountsTable + )) + }) + + domain <- tibble::tribble( + ~domain_id , ~table_name , ~concept_id_field , ~date_field , ~maps_to_concept_id_field , + "Condition" , "condition_occurrence" , "condition_concept_id" , "condition_start_date" , "condition_source_concept_id" + ) + + visitSourceGroupConceptIds = c( + # longitudinal + 2002330246, # INPAT + 2002330247, # OPER_IN + 2002330248, # OPER_OUT + #2002330249, # OUTPAT + 2002330250, # PRIM_OUT + 2002330102, # REIM + 2002330104, # DEATH + 2002330101, # PURCH + 2002330103, # CANC + # registers + 2002330245, # KANTA + 2002330106, # BIOBANK + 2002330186, # KIDNEY + 2002330119, # VISION + 2002330105, # BIRTH_MOTHER + # Drugs + 2002330251, # PRESCRIPTION + 2002330252, # DELIVERY + 2002330253, # PRESCRIPTION_DELIVERY + 2002330254, # DELIVERY_KELA + 2002330255 # PRESCRIPTION_DELIVERY_KELA + ) + + # codeAtomicCountsWithDuplicatedCounts + suppressWarnings( + createStratifiedCodeCountsTable( + CDMdbHandler, + domains = domain, + stratifiedCodeCountsTable = stratifiedCodeCountsTable, + visitSourceGroupConceptIds = visitSourceGroupConceptIds + ) + ) + + stratifiedCodeCounts <- CDMdbHandler$connectionHandler$tbl(I(paste0( + resultsDatabaseSchema, + ".", + stratifiedCodeCountsTable + ))) + + # check that the table was created + nrows <- stratifiedCodeCounts |> + dplyr::count() |> + dplyr::pull(n) + + nrows |> expect_gt(0) + + # check that the table was created with correct columns + stratifiedCodeCounts |> + head() |> + dplyr::collect() |> + colnames() |> + expect_equal(c( + "concept_id", + "maps_to_concept_id", + "visit_group_concept_id", + "calendar_year", + "gender_concept_id", + "age_decile", + "record_counts" + )) + + + stratifiedCodeCounts |> + dplyr::filter(visit_group_concept_id == 0) |> + dplyr::count() |> + dplyr::pull(n) |> + expect_equal(0) + + ## All the visit_group_concept_id are in the provided visitSourceGroupConceptIds + stratifiedCodeCounts |> + dplyr::distinct(visit_group_concept_id) |> + dplyr::pull(visit_group_concept_id) |> + (\(x) expect_false(all(x %in% visitSourceGroupConceptIds)))() }) # test_that("createObservationCountsTable works", { @@ -76,12 +315,14 @@ test_that("createStratifiedCodeCountsTable works with duplicated counts", { # expect_gt(0) # }) - test_that("createCodeCountsTables works", { # only works in a full CDM database - skip_if(testingDatabase == "OnlyCounts-FinnGen") + skip_if(testingDatabase != "AtlasDevelopment-5k") - CDMdbHandler <- HadesExtras_createCDMdbHandlerFromList(test_cohortTableHandlerConfig, loadConnectionChecksLevel = "basicChecks") + CDMdbHandler <- HadesExtras_createCDMdbHandlerFromList( + test_cohortTableHandlerConfig, + loadConnectionChecksLevel = "basicChecks" + ) withr::defer({ CDMdbHandler <- NULL gc() @@ -90,8 +331,18 @@ test_that("createCodeCountsTables works", { codeCountsTable <- "code_counts_test0" stratifiedCodeCountsTable <- paste0("stratified_", codeCountsTable) withr::defer({ - CDMdbHandler$connectionHandler$executeSql(paste0("DROP TABLE ", resultsDatabaseSchema, ".", codeCountsTable)) - CDMdbHandler$connectionHandler$executeSql(paste0("DROP TABLE ", resultsDatabaseSchema, ".", stratifiedCodeCountsTable)) + CDMdbHandler$connectionHandler$executeSql(paste0( + "DROP TABLE ", + resultsDatabaseSchema, + ".", + codeCountsTable + )) + CDMdbHandler$connectionHandler$executeSql(paste0( + "DROP TABLE ", + resultsDatabaseSchema, + ".", + stratifiedCodeCountsTable + )) }) createCodeCountsTables(CDMdbHandler, codeCountsTable = codeCountsTable) @@ -99,7 +350,140 @@ test_that("createCodeCountsTables works", { # - Check if the table was created resultsDatabaseSchema <- CDMdbHandler$resultsDatabaseSchema cdmDatabaseSchema <- CDMdbHandler$cdmDatabaseSchema - code_counts <- CDMdbHandler$connectionHandler$tbl(I(paste0(resultsDatabaseSchema, ".", codeCountsTable))) + code_counts <- CDMdbHandler$connectionHandler$tbl(I(paste0( + resultsDatabaseSchema, + ".", + codeCountsTable + ))) + + # check that the table was created with correct columns + code_counts |> + dplyr::count() |> + dplyr::pull(n) |> + expect_gt(0) + code_counts |> + head() |> + dplyr::collect() |> + colnames() |> + expect_equal(c( + "concept_id", + "record_counts", + "descendant_record_counts", + "number_of_descendants" + )) + + # check that descendant_record_counts is greater than or equal to record_counts + code_counts |> + dplyr::filter(descendant_record_counts < record_counts) |> + dplyr::count() |> + dplyr::pull(n) |> + expect_equal(0) + + # check that concept_id is unique + code_counts |> + dplyr::distinct(concept_id) |> + dplyr::count() |> + dplyr::pull(n) |> + expect_equal(code_counts |> dplyr::count() |> dplyr::pull(n)) + + # check that number_of_descendants is greater than or equal to 1 + code_counts |> + dplyr::filter(number_of_descendants < 1) |> + dplyr::count() |> + dplyr::pull(n) |> + expect_equal(0) + + # check that all with record_counts = descendant_record_counts have number_of_descendants = 1 + code_counts |> + dplyr::filter(record_counts == descendant_record_counts) |> + dplyr::filter(number_of_descendants != 1) |> + dplyr::count() |> + dplyr::pull(n) |> + expect_equal(0) + + # check that all with number_of_descendants > 1 have record_counts > descendant_record_counts + code_counts |> + dplyr::filter(number_of_descendants > 1) |> + dplyr::filter(record_counts > descendant_record_counts) |> + dplyr::count() |> + dplyr::pull(n) |> + expect_equal(0) +}) + + + + +test_that("createCodeCountsTables works stratified by visit_group_concept_id", { + # only works in a full CDM database + skip_if(testingDatabase != "AtlasDevelopment-5k") + + CDMdbHandler <- HadesExtras_createCDMdbHandlerFromList( + test_cohortTableHandlerConfig, + loadConnectionChecksLevel = "basicChecks" + ) + withr::defer({ + CDMdbHandler <- NULL + gc() + }) + + codeCountsTable <- "code_counts_test0" + stratifiedCodeCountsTable <- paste0("stratified_", codeCountsTable) + withr::defer({ + CDMdbHandler$connectionHandler$executeSql(paste0( + "DROP TABLE ", + resultsDatabaseSchema, + ".", + codeCountsTable + )) + CDMdbHandler$connectionHandler$executeSql(paste0( + "DROP TABLE ", + resultsDatabaseSchema, + ".", + stratifiedCodeCountsTable + )) + }) + + + visitSourceGroupConceptIds = c( + # longitudinal + 2002330246, # INPAT + 2002330247, # OPER_IN + 2002330248, # OPER_OUT + #2002330249, # OUTPAT + 2002330250, # PRIM_OUT + 2002330102, # REIM + 2002330104, # DEATH + 2002330101, # PURCH + 2002330103, # CANC + # registers + 2002330245, # KANTA + 2002330106, # BIOBANK + 2002330186, # KIDNEY + 2002330119, # VISION + 2002330105, # BIRTH_MOTHER + # Drugs + 2002330251, # PRESCRIPTION + 2002330252, # DELIVERY + 2002330253, # PRESCRIPTION_DELIVERY + 2002330254, # DELIVERY_KELA + 2002330255 # PRESCRIPTION_DELIVERY_KELA + ) + + + createCodeCountsTables( + CDMdbHandler, + codeCountsTable = codeCountsTable, + visitSourceGroupConceptIds = visitSourceGroupConceptIds + ) + + # - Check if the table was created + resultsDatabaseSchema <- CDMdbHandler$resultsDatabaseSchema + cdmDatabaseSchema <- CDMdbHandler$cdmDatabaseSchema + code_counts <- CDMdbHandler$connectionHandler$tbl(I(paste0( + resultsDatabaseSchema, + ".", + codeCountsTable + ))) # check that the table was created with correct columns code_counts |> diff --git a/tests/testthat/test-getCodeCounts.R b/tests/testthat/test-getCodeCounts.R index 535117b..e1e953e 100644 --- a/tests/testthat/test-getCodeCounts.R +++ b/tests/testthat/test-getCodeCounts.R @@ -63,11 +63,11 @@ test_that("getCodeCounts works", { # Check column names stratified_code_counts |> colnames() |> - expect_equal(c("concept_id", "calendar_year", "gender_concept_id", "age_decile", "node_record_counts", "node_descendant_record_counts")) + expect_equal(c("concept_id", "visit_group_concept_id", "calendar_year", "gender_concept_id", "age_decile", "node_record_counts", "node_descendant_record_counts")) # columns not empty stratified_code_counts |> - dplyr::filter(is.na(concept_id) | is.na(calendar_year) | is.na(gender_concept_id) | is.na(age_decile) | is.na(node_record_counts) | is.na(node_descendant_record_counts)) |> + dplyr::filter(is.na(concept_id) | is.na(visit_group_concept_id) | is.na(calendar_year) | is.na(gender_concept_id) | is.na(age_decile) | is.na(node_record_counts) | is.na(node_descendant_record_counts)) |> nrow() |> expect_equal(0) diff --git a/tests/testthat/test-getVisitTypeNames.R b/tests/testthat/test-getVisitTypeNames.R new file mode 100644 index 0000000..71d48c2 --- /dev/null +++ b/tests/testthat/test-getVisitTypeNames.R @@ -0,0 +1,63 @@ +test_that("getVisitTypeNames works", { + CDMdbHandler <- HadesExtras_createCDMdbHandlerFromList( + test_cohortTableHandlerConfig, + loadConnectionChecksLevel = "basicChecks" + ) + withr::defer({ + CDMdbHandler <- NULL + gc() + }) + + suppressWarnings( + visitTypeNames <- getVisitTypeNames(CDMdbHandler) + ) + + # Check that the function returns a tibble + expect_s3_class(visitTypeNames, "tbl_df") + + # Check column names + visitTypeNames |> + colnames() |> + expect_equal(c("visitGroupConceptId", "conceptCode", "conceptName")) + + # Check that columns are not empty + visitTypeNames |> + dplyr::filter(is.na(visitGroupConceptId) | is.na(conceptCode) | is.na(conceptName)) |> + nrow() |> + expect_equal(0) + + # Check that visitGroupConceptId is not 0 + visitTypeNames |> + dplyr::filter(visitGroupConceptId == 0) |> + nrow() |> + expect_equal(0) + + # Check that visitGroupConceptId values are unique + visitTypeNames |> + dplyr::distinct(visitGroupConceptId) |> + nrow() |> + expect_equal(nrow(visitTypeNames)) +}) + +test_that("getVisitTypeNames_memoise works", { + CDMdbHandler <- HadesExtras_createCDMdbHandlerFromList( + test_cohortTableHandlerConfig, + loadConnectionChecksLevel = "basicChecks" + ) + withr::defer({ + CDMdbHandler <- NULL + gc() + }) + + suppressWarnings( + visitTypeNames1 <- getVisitTypeNames_memoise(CDMdbHandler) + ) + + suppressWarnings( + visitTypeNames2 <- getVisitTypeNames_memoise(CDMdbHandler) + ) + + # Check that both calls return the same result + expect_equal(visitTypeNames1, visitTypeNames2) +}) + diff --git a/vignettes/use_cases.Rmd b/vignettes/use_cases.Rmd index 826b66e..abab9fe 100644 --- a/vignettes/use_cases.Rmd +++ b/vignettes/use_cases.Rmd @@ -18,11 +18,11 @@ knitr::opts_chunk$set( library(ROMOPAPI) test_databasesConfig <- HadesExtras_readAndParseYaml( - pathToYalmFile = system.file("testdata", "config", "onlyCounts_databasesConfig.yml", package = "ROMOPAPI"), + pathToYalmFile = system.file("testdata", "config", "databasesConfig.yml", package = "ROMOPAPI"), pathToFinnGenCountsSqlite = helper_FinnGen_getDatabaseFileCounts() ) -cohortTableHandlerConfig <- test_databasesConfig[[1]]$cohortTableHandler +cohortTableHandlerConfig <- test_databasesConfig[["FC"]]$cohortTableHandler # Create CDMdbHandler CDMdbHandler <- HadesExtras_createCDMdbHandlerFromList(cohortTableHandlerConfig, loadConnectionChecksLevel = "basicChecks")