-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdriver_anon.R
More file actions
121 lines (107 loc) · 4.45 KB
/
driver_anon.R
File metadata and controls
121 lines (107 loc) · 4.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# Top-level code for execution of data request
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(rlang))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(readr))
suppressPackageStartupMessages(library(stringr))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(purrr))
# Need to do this for assignInNamespace to work
suppressPackageStartupMessages(library(dbplyr))
# Required for execution using Rscript
suppressPackageStartupMessages(library(methods))
#' Set up the execution environment
#'
#' The .load() function sources the R files needed to execute the query
#' and sets up the execution environment. In particular, all of the base
#' framework files, as well as files inthe code_dir with names matching
#' `cohort_*.R` or `analyze_*.R` will be sourced.
#'
#' This function is usually run automatically when the `run.R` file is sourced
#' to execute the request. It may also be executed manually during an
#' interactive session to re-source changed code or to re-establish a connection
#' to the database.
#'
#' **N.B.** You will almost never have to edit this function.
#'
#' @param here The name of the top-level directory for the request. The default
#' is `config('base_dir')` if the config function has been set up, or the
#' global variable `base_dir` if not.
#'
#' @return The value of `here`.
#' @md
.load <- function(here = ifelse(typeof(get('config')) == 'closure',
config('base_dir'), base_dir)) {
source(file.path(here, 'code', 'config.R'))
source(file.path(here, 'code', 'req_info.R'))
source(config('site_info'))
source(file.path(here, config('subdirs')$code_dir, 'setup.R'))
source(file.path(here, config('subdirs')$code_dir, 'codesets.R'))
for (fn in list.files(file.path(here, config('subdirs')$code_dir),
'util_.+\\.R', full.names = TRUE))
source(fn)
for (fn in list.files(file.path(here, config('subdirs')$code_dir),
'cohort_.+\\.R', full.names = TRUE))
source(fn)
for (fn in list.files(file.path(here, config('subdirs')$code_dir),
'analyze_.+\\.R', full.names = TRUE))
source(fn)
source(file.path(here, config('subdirs')$code_dir, 'cohorts.R'))
.env_setup()
for (def in c('retain_intermediates', 'results_schema')) {
if (is.na(config(def)))
config(def, config(paste0('default_', def)))
}
here
}
#' Execute the request
#'
#' This function presumes the environment has been set up, and executes the
#' steps of the request.
#'
#' In addition to performing queries and analyses, the execution path in this
#' function should include periodic progress messages to the user, and logging
#' of intermediate totals and timing data through [append_sum()].
#'
#' This function is also typically executed automatically, but is separated from
#' the setup done in [.load()] to facilitate direct invocation during
#' development and debugging.
#'
#' @param base_dir The name of the top-level directory for the request. The default
#' is `config('base_dir')`, which should always be valid after execution of
#' [.load()].
#'
#' @return The return value is dependent on the content of the request, but is
#' typically a structure pointing to some or all of the retrieved data or
#' analysis results. The value is not used by the framework itself.
#' @md
.run <- function(base_dir = config('base_dir')) {
message('Starting execution with framework version ',
config('framework_version'))
rslt <- list()
message('Generate and add masked site identifiers to all tables with "site" column')
rslt$pp_tbl_names <- pull_site_tables()
rslt$tbls_anon <- attach_anon_id(all_sites_tbl=results_tbl('dc_output'),
tbls_to_anon=rslt$pp_tbl_names)
output_list_to_db_collect(rslt$tbls_anon,
append=FALSE)
message('Done.')
invisible(rslt)
}
#' Set up and execute a data request
#'
#' This function encapsulates a "production" run of the data request. It sets
#' up the environment, executes the request, and cleans up the environment.
#'
#' Typically, the `run.R` file calls run_request() when in a production mode.
#'
#' @param base_dir Path to the top of the data request files. This is
#' typically specified in `run.R`.
#'
#' @return The result of [.run()].
#' @md
run_request <- function(base_dir) {
base_dir <- .load(base_dir)
on.exit(.env_cleanup())
.run(base_dir)
}