Title: | Characterise Tables of an OMOP Common Data Model Instance |
---|---|
Description: | Summarises key information in data mapped to the Observational Medical Outcomes Partnership (OMOP) common data model. Assess suitability to perform specific epidemiological studies and explore the different domains to obtain feasibility counts and trends. |
Authors: | Marta Alcalde-Herraiz [aut, cre] , Kim Lopez-Guell [aut] , Elin Rowlands [aut] , Cecilia Campanile [aut] , Edward Burn [aut] , Martí Català [aut] |
Maintainer: | Cecilia Campanile <[email protected]> |
License: | Apache License (>= 2) |
Version: | 0.1.1 |
Built: | 2024-11-21 12:43:44 UTC |
Source: | https://github.com/ohdsi/omopsketch |
Creates a mock database to test OmopSketch package.
mockOmopSketch( con = NULL, writeSchema = NULL, numberIndividuals = 100, seed = NULL )
mockOmopSketch( con = NULL, writeSchema = NULL, numberIndividuals = 100, seed = NULL )
con |
A DBI connection to create the cdm mock object. By default, the connection would be a 'duckdb' one. |
writeSchema |
Name of an schema of the DBI connection with writing permissions. |
numberIndividuals |
Number of individuals to create in the cdm reference object. |
seed |
An optional integer used to set the seed for random number generation, ensuring reproducibility of the generated data. If provided, this seed allows the function to produce consistent results each time it is run with the same inputs. If 'NULL', the seed is not set, which can lead to different outputs on each run. |
A mock cdm_reference object.
mockOmopSketch(numberIndividuals = 100)
mockOmopSketch(numberIndividuals = 100)
Plot the concept counts of a summariseConceptCounts output.
plotConceptCounts(result, facet = NULL, colour = NULL)
plotConceptCounts(result, facet = NULL, colour = NULL)
result |
A summarised_result object (output of summariseConceptCounts). |
facet |
Columns to face by. Formula format can be provided. See possible
columns to face by with: |
colour |
Columns to colour by. See possible columns to colour by with:
|
A ggplot2 object showing the concept counts.
library(dplyr) cdm <- mockOmopSketch() result <- cdm |> summariseConceptCounts( conceptId = list( "Renal agenesis" = 194152, "Manic mood" = c(4226696, 4304866, 37110496, 40371897) ) ) result |> filter(variable_name == "Number subjects") |> plotConceptCounts(facet = "codelist_name", colour = "standard_concept_name") PatientProfiles::mockDisconnect(cdm)
library(dplyr) cdm <- mockOmopSketch() result <- cdm |> summariseConceptCounts( conceptId = list( "Renal agenesis" = 194152, "Manic mood" = c(4226696, 4304866, 37110496, 40371897) ) ) result |> filter(variable_name == "Number subjects") |> plotConceptCounts(facet = "codelist_name", colour = "standard_concept_name") PatientProfiles::mockDisconnect(cdm)
Create a ggplot2 plot from the output of summariseInObservation().
plotInObservation(result, facet = NULL, colour = NULL)
plotInObservation(result, facet = NULL, colour = NULL)
result |
A summarised_result object (output of summariseInObservation). |
facet |
Columns to face by. Formula format can be provided. See possible
columns to face by with: |
colour |
Columns to colour by. See possible columns to colour by with:
|
A ggplot showing the table counts
library(dplyr) cdm <- mockOmopSketch() result <- summariseInObservation( cdm$observation_period, output = c("person-days","records"), ageGroup = list("<=40" = c(0, 40), ">40" = c(41, Inf)), sex = TRUE ) result |> filter(variable_name == "Number person-days") |> plotInObservation(facet = "sex", colour = "age_group") PatientProfiles::mockDisconnect(cdm)
library(dplyr) cdm <- mockOmopSketch() result <- summariseInObservation( cdm$observation_period, output = c("person-days","records"), ageGroup = list("<=40" = c(0, 40), ">40" = c(41, Inf)), sex = TRUE ) result |> filter(variable_name == "Number person-days") |> plotInObservation(facet = "sex", colour = "age_group") PatientProfiles::mockDisconnect(cdm)
Create a plot from the output of summariseObservationPeriod().
plotObservationPeriod( result, variableName = "number subjects", plotType = "barplot", facet = NULL, colour = NULL )
plotObservationPeriod( result, variableName = "number subjects", plotType = "barplot", facet = NULL, colour = NULL )
result |
A summarised_result object. |
variableName |
The variable to plot it can be: "number subjects", "records per person", "duration" or "days to next observation period". |
plotType |
The plot type, it can be: "barplot", "boxplot" or "densityplot". |
facet |
Columns to colour by. See possible columns to colour by with:
|
colour |
Columns to colour by. See possible columns to colour by with:
|
A ggplot2 object.
cdm <- mockOmopSketch(numberIndividuals = 100) result <- summariseObservationPeriod(cdm$observation_period) result |> plotObservationPeriod( variableName = "duration in days", plotType = "boxplot" ) PatientProfiles::mockDisconnect(cdm)
cdm <- mockOmopSketch(numberIndividuals = 100) result <- summariseObservationPeriod(cdm$observation_period) result |> plotObservationPeriod( variableName = "duration in days", plotType = "boxplot" ) PatientProfiles::mockDisconnect(cdm)
Create a ggplot of the records' count trend.
plotRecordCount(result, facet = NULL, colour = NULL)
plotRecordCount(result, facet = NULL, colour = NULL)
result |
Output from summariseRecordCount(). |
facet |
Columns to face by. Formula format can be provided. See possible
columns to face by with: |
colour |
Columns to colour by. See possible columns to colour by with:
|
A ggplot showing the table counts
cdm <- mockOmopSketch() summarisedResult <- summariseRecordCount( cdm = cdm, omopTableName = "condition_occurrence", ageGroup = list("<=20" = c(0,20), ">20" = c(21, Inf)), sex = TRUE ) plotRecordCount(summarisedResult, colour = "age_group", facet = sex ~ .) PatientProfiles::mockDisconnect(cdm = cdm)
cdm <- mockOmopSketch() summarisedResult <- summariseRecordCount( cdm = cdm, omopTableName = "condition_occurrence", ageGroup = list("<=20" = c(0,20), ">20" = c(21, Inf)), sex = TRUE ) plotRecordCount(summarisedResult, colour = "age_group", facet = sex ~ .) PatientProfiles::mockDisconnect(cdm = cdm)
Summarise concept use in patient-level data
summariseAllConceptCounts( cdm, omopTableName, countBy = "record", year = FALSE, sex = FALSE, ageGroup = NULL, dateRange = NULL )
summariseAllConceptCounts( cdm, omopTableName, countBy = "record", year = FALSE, sex = FALSE, ageGroup = NULL, dateRange = NULL )
cdm |
A cdm object |
omopTableName |
A character vector of the names of the tables to summarise in the cdm object. |
countBy |
Either "record" for record-level counts or "person" for person-level counts |
year |
TRUE or FALSE. If TRUE code use will be summarised by year. |
sex |
TRUE or FALSE. If TRUE code use will be summarised by sex. |
ageGroup |
A list of ageGroup vectors of length two. Code use will be thus summarised by age groups. |
dateRange |
A list containing the minimum and the maximum dates defining the time range within which the analysis is performed. |
A summarised_result object with results overall and, if specified, by strata.
Summarise an omop table from a cdm object. You will obtain information related to the number of records, number of subjects, whether the records are in observation, number of present domains and number of present concepts.
summariseClinicalRecords( cdm, omopTableName, recordsPerPerson = c("mean", "sd", "median", "q25", "q75", "min", "max"), inObservation = TRUE, standardConcept = TRUE, sourceVocabulary = FALSE, domainId = TRUE, typeConcept = TRUE, sex = FALSE, ageGroup = NULL, dateRange = NULL )
summariseClinicalRecords( cdm, omopTableName, recordsPerPerson = c("mean", "sd", "median", "q25", "q75", "min", "max"), inObservation = TRUE, standardConcept = TRUE, sourceVocabulary = FALSE, domainId = TRUE, typeConcept = TRUE, sex = FALSE, ageGroup = NULL, dateRange = NULL )
cdm |
A cdm_reference object. |
omopTableName |
A character vector of the names of the tables to summarise in the cdm object. |
recordsPerPerson |
Generates summary statistics for the number of records per person. Set to NULL if no summary statistics are required. |
inObservation |
Boolean variable. Whether to include the percentage of records in observation. |
standardConcept |
Boolean variable. Whether to summarise standard concept information. |
sourceVocabulary |
Boolean variable. Whether to summarise source vocabulary information. |
domainId |
Boolean variable. Whether to summarise domain id of standard concept id information. |
typeConcept |
Boolean variable. Whether to summarise type concept id field information. |
sex |
Boolean variable. Whether to stratify by sex (TRUE) or not (FALSE). |
ageGroup |
A list of age groups to stratify results by. |
dateRange |
A list containing the minimum and the maximum dates defining the time range within which the analysis is performed. |
A summarised_result object.
cdm <- mockOmopSketch() summarisedResult <- summariseClinicalRecords( cdm = cdm, omopTableName = "condition_occurrence", recordsPerPerson = c("mean", "sd"), inObservation = TRUE, standardConcept = TRUE, sourceVocabulary = TRUE, domainId = TRUE, typeConcept = TRUE ) summarisedResult PatientProfiles::mockDisconnect(cdm = cdm)
cdm <- mockOmopSketch() summarisedResult <- summariseClinicalRecords( cdm = cdm, omopTableName = "condition_occurrence", recordsPerPerson = c("mean", "sd"), inObservation = TRUE, standardConcept = TRUE, sourceVocabulary = TRUE, domainId = TRUE, typeConcept = TRUE ) summarisedResult PatientProfiles::mockDisconnect(cdm = cdm)
Summarise concept counts in patient-level data. Only concepts recorded during observation period are counted.
summariseConceptCounts( cdm, conceptId, countBy = c("record", "person"), concept = TRUE, interval = "overall", sex = FALSE, ageGroup = NULL, dateRange = NULL )
summariseConceptCounts( cdm, conceptId, countBy = c("record", "person"), concept = TRUE, interval = "overall", sex = FALSE, ageGroup = NULL, dateRange = NULL )
cdm |
A cdm object |
conceptId |
List of concept IDs to summarise. |
countBy |
Either "record" for record-level counts or "person" for person-level counts |
concept |
TRUE or FALSE. If TRUE code use will be summarised by concept. |
interval |
Time interval to stratify by. It can either be "years", "quarters", "months" or "overall". |
sex |
TRUE or FALSE. If TRUE code use will be summarised by sex. |
ageGroup |
A list of ageGroup vectors of length two. Code use will be thus summarised by age groups. |
dateRange |
A list containing the minimum and the maximum dates defining the time range within which the analysis is performed. |
A summarised_result object with results overall and, if specified, by strata.
library(OmopSketch) cdm <- mockOmopSketch() cs <- list(sumatriptan = c(35604883, 35604879, 35604880, 35604884)) results <- summariseConceptCounts(cdm, conceptId = cs) results PatientProfiles::mockDisconnect(cdm)
library(OmopSketch) cdm <- mockOmopSketch() cs <- list(sumatriptan = c(35604883, 35604879, 35604880, 35604884)) results <- summariseConceptCounts(cdm, conceptId = cs) results PatientProfiles::mockDisconnect(cdm)
Summarise the number of people in observation during a specific interval of time.
summariseInObservation( observationPeriod, interval = "overall", output = "records", ageGroup = NULL, sex = FALSE, dateRange = NULL )
summariseInObservation( observationPeriod, interval = "overall", output = "records", ageGroup = NULL, sex = FALSE, dateRange = NULL )
observationPeriod |
An observation_period omop table. It must be part of a cdm_reference object. |
interval |
Time interval to stratify by. It can either be "years", "quarters", "months" or "overall". |
output |
Output format. It can be either the number of records ("records") that are in observation in the specific interval of time, the number of person-days ("person-days"), or both c("records","person-days"). |
ageGroup |
A list of age groups to stratify results by. |
sex |
Boolean variable. Whether to stratify by sex (TRUE) or not (FALSE). |
dateRange |
A list containing the minimum and the maximum dates defining the time range within which the analysis is performed. |
A summarised_result object.
library(dplyr, warn.conflicts = FALSE) cdm <- mockOmopSketch() result <- summariseInObservation( cdm$observation_period, interval = "months", output = c("person-days","records"), ageGroup = list("<=60" = c(0,60), ">60" = c(61, Inf)), sex = TRUE ) result |> glimpse() PatientProfiles::mockDisconnect(cdm)
library(dplyr, warn.conflicts = FALSE) cdm <- mockOmopSketch() result <- summariseInObservation( cdm$observation_period, interval = "months", output = c("person-days","records"), ageGroup = list("<=60" = c(0,60), ">60" = c(61, Inf)), sex = TRUE ) result |> glimpse() PatientProfiles::mockDisconnect(cdm)
Summarise missing data in omop tables
summariseMissingData( cdm, omopTableName, col = NULL, sex = FALSE, year = FALSE, ageGroup = NULL, sample = 1e+06, dateRange = NULL )
summariseMissingData( cdm, omopTableName, col = NULL, sex = FALSE, year = FALSE, ageGroup = NULL, sample = 1e+06, dateRange = NULL )
cdm |
A cdm object |
omopTableName |
A character vector of the names of the tables to summarise in the cdm object. |
col |
A character vector of column names to check for missing values.
If |
sex |
TRUE or FALSE. If TRUE code use will be summarised by sex. |
year |
TRUE or FALSE. If TRUE code use will be summarised by year. |
ageGroup |
A list of ageGroup vectors of length two. Code use will be thus summarised by age groups. |
sample |
An integer to sample the table to only that number of records. If NULL no sample is done. |
dateRange |
A list containing the minimum and the maximum dates defining the time range within which the analysis is performed. |
A summarised_result object with results overall and, if specified, by strata.
Summarise the observation period table getting some overall statistics in a summarised_result object.
summariseObservationPeriod( observationPeriod, estimates = c("mean", "sd", "min", "q05", "q25", "median", "q75", "q95", "max", "density"), ageGroup = NULL, sex = FALSE, dateRange = NULL )
summariseObservationPeriod( observationPeriod, estimates = c("mean", "sd", "min", "q05", "q25", "median", "q75", "q95", "max", "density"), ageGroup = NULL, sex = FALSE, dateRange = NULL )
observationPeriod |
observation_period omop table. |
estimates |
Estimates to summarise the variables of interest (
|
ageGroup |
A list of age groups to stratify results by. |
sex |
Boolean variable. Whether to stratify by sex (TRUE) or not (FALSE). |
dateRange |
A list containing the minimum and the maximum dates defining the time range within which the analysis is performed. |
A summarised_result object with the summarised data.
library(dplyr, warn.conflicts = FALSE) cdm <- mockOmopSketch(numberIndividuals = 100) result <- summariseObservationPeriod(cdm$observation_period) result |> glimpse() PatientProfiles::mockDisconnect(cdm)
library(dplyr, warn.conflicts = FALSE) cdm <- mockOmopSketch(numberIndividuals = 100) result <- summariseObservationPeriod(cdm$observation_period) result |> glimpse() PatientProfiles::mockDisconnect(cdm)
Summarise a cdm_reference object creating a snapshot with the metadata of the cdm_reference object.
summariseOmopSnapshot(cdm)
summariseOmopSnapshot(cdm)
cdm |
A cdm_reference object. |
A summarised_result object.
cdm <- mockOmopSketch(numberIndividuals = 10) summariseOmopSnapshot(cdm)
cdm <- mockOmopSketch(numberIndividuals = 10) summariseOmopSnapshot(cdm)
Summarise record counts of an omop_table using a specific time interval. Only records that fall within the observation period are considered.
summariseRecordCount( cdm, omopTableName, interval = "overall", ageGroup = NULL, sex = FALSE, dateRange = NULL )
summariseRecordCount( cdm, omopTableName, interval = "overall", ageGroup = NULL, sex = FALSE, dateRange = NULL )
cdm |
A cdm_reference object. |
omopTableName |
A character vector of omop tables from the cdm. |
interval |
Time interval to stratify by. It can either be "years", "quarters", "months" or "overall". |
ageGroup |
A list of age groups to stratify results by. |
sex |
Whether to stratify by sex (TRUE) or not (FALSE). |
dateRange |
A list containing the minimum and the maximum dates defining the time range within which the analysis is performed. |
A summarised_result object.
library(dplyr, warn.conflicts = FALSE) cdm <- mockOmopSketch() summarisedResult <- summariseRecordCount( cdm = cdm, omopTableName = c("condition_occurrence", "drug_exposure"), interval = "years", ageGroup = list("<=20" = c(0,20), ">20" = c(21, Inf)), sex = TRUE ) summarisedResult |> glimpse() PatientProfiles::mockDisconnect(cdm = cdm)
library(dplyr, warn.conflicts = FALSE) cdm <- mockOmopSketch() summarisedResult <- summariseRecordCount( cdm = cdm, omopTableName = c("condition_occurrence", "drug_exposure"), interval = "years", ageGroup = list("<=20" = c(0,20), ">20" = c(21, Inf)), sex = TRUE ) summarisedResult |> glimpse() PatientProfiles::mockDisconnect(cdm = cdm)
Create a visual table from a summariseAllConceptCounts() result.
tableAllConceptCounts(result, type = "gt")
tableAllConceptCounts(result, type = "gt")
result |
A summarised_result object. |
type |
Type of formatting output table, either "gt" or "flextable". |
A gt or flextable object with the summarised data.
Create a visual table from a summariseClinicalRecord() output.
tableClinicalRecords(result, type = "gt")
tableClinicalRecords(result, type = "gt")
result |
Output from summariseClinicalRecords(). |
type |
Type of formatting output table, either "gt" or "flextable". |
A gt or flextable object with the summarised data.
cdm <- mockOmopSketch() summarisedResult <- summariseClinicalRecords( cdm = cdm, omopTableName = c("condition_occurrence", "drug_exposure"), recordsPerPerson = c("mean", "sd"), inObservation = TRUE, standardConcept = TRUE, sourceVocabulary = TRUE, domainId = TRUE, typeConcept = TRUE ) summarisedResult |> suppress(minCellCount = 5) |> tableClinicalRecords() PatientProfiles::mockDisconnect(cdm)
cdm <- mockOmopSketch() summarisedResult <- summariseClinicalRecords( cdm = cdm, omopTableName = c("condition_occurrence", "drug_exposure"), recordsPerPerson = c("mean", "sd"), inObservation = TRUE, standardConcept = TRUE, sourceVocabulary = TRUE, domainId = TRUE, typeConcept = TRUE ) summarisedResult |> suppress(minCellCount = 5) |> tableClinicalRecords() PatientProfiles::mockDisconnect(cdm)
Create a visual table from a summariseMissingData() result.
tableMissingData(result, type = "gt")
tableMissingData(result, type = "gt")
result |
A summarised_result object. |
type |
Type of formatting output table, either "gt" or "flextable". |
A gt or flextable object with the summarised data.
Create a visual table from a summariseObservationPeriod() result.
tableObservationPeriod(result, type = "gt")
tableObservationPeriod(result, type = "gt")
result |
A summarised_result object. |
type |
Type of formatting output table, either "gt" or "flextable". |
A gt or flextable object with the summarised data.
cdm <- mockOmopSketch(numberIndividuals = 100) result <- summariseObservationPeriod(cdm$observation_period) tableObservationPeriod(result) PatientProfiles::mockDisconnect(cdm)
cdm <- mockOmopSketch(numberIndividuals = 100) result <- summariseObservationPeriod(cdm$observation_period) tableObservationPeriod(result) PatientProfiles::mockDisconnect(cdm)
Create a visual table from a summarise_omop_snapshot result.
tableOmopSnapshot(result, type = "gt")
tableOmopSnapshot(result, type = "gt")
result |
Output from summariseOmopSnapshot(). |
type |
Type of formatting output table, either "gt" or "flextable". |
A gt or flextable object with the summarised data.
cdm <- mockOmopSketch(numberIndividuals = 10) result <- summariseOmopSnapshot(cdm) result |> tableOmopSnapshot() PatientProfiles::mockDisconnect(cdm)
cdm <- mockOmopSketch(numberIndividuals = 10) result <- summariseOmopSnapshot(cdm) result |> tableOmopSnapshot() PatientProfiles::mockDisconnect(cdm)