--- title: "Getting Started with omophub" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Getting Started with omophub} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE ) ``` ## Introduction The omophub package provides an R interface to the OMOPHub API for accessing OHDSI ATHENA standardized medical vocabularies. This vignette demonstrates basic usage patterns. ## Installation Install from GitHub: ```{r install} # install.packages("devtools") devtools::install_github("omophub/omophub-R") ``` ## Authentication The package requires an API key from [OMOPHub](https://dashboard.omophub.com). Set your API key as an environment variable: ```{r auth-env} Sys.setenv(OMOPHUB_API_KEY = "your_api_key_here") ``` Or add it to your `.Renviron` file for persistence: ``` OMOPHUB_API_KEY=your_api_key_here ``` ## Creating a Client ```{r client} library(omophub) # Create client (reads API key from environment) client <- OMOPHubClient$new() # Or provide API key explicitly client <- OMOPHubClient$new(api_key = "your_api_key") # With additional options client <- OMOPHubClient$new( api_key = "your_api_key", timeout = 30, max_retries = 3, vocab_version = "2025.1" ) ``` ## Getting Concepts Retrieve a concept by its OMOP concept ID: ```{r get-concept} concept <- client$concepts$get(201826) print(concept$concept_name) # [1] "Type 2 diabetes mellitus" ``` Get a concept by vocabulary-specific code: ```{r get-by-code} concept <- client$concepts$get_by_code("SNOMED", "44054006") print(concept$concept_name) # [1] "Type 2 diabetes mellitus" ``` ## Batch Operations Retrieve multiple concepts in a single request: ```{r batch} result <- client$concepts$batch(c(201826, 4329847, 1112807)) for (concept in result$concepts) { cat(sprintf("%s: %s\n", concept$concept_id, concept$concept_name)) } ``` ## Searching Concepts Basic search: ```{r search-basic} results <- client$search$basic("diabetes mellitus", page_size = 10) for (concept in results$data) { cat(sprintf("%s: %s\n", concept$concept_id, concept$concept_name)) } ``` Search with filters: ```{r search-filters} results <- client$search$basic( "heart attack", vocabulary_ids = "SNOMED", domain_ids = "Condition", page_size = 20 ) ``` ## Semantic Search Search using natural language queries powered by neural embeddings: ```{r semantic-search} # Natural language search - understands clinical intent results <- client$search$semantic("high blood sugar levels") for (r in results$data$results) { cat(sprintf("%s (similarity: %.2f)\n", r$concept_name, r$similarity_score)) } ``` Filter semantic search results: ```{r semantic-filtered} results <- client$search$semantic( "heart attack", vocabulary_ids = "SNOMED", domain_ids = "Condition", threshold = 0.5 ) ``` Fetch all semantic search results with automatic pagination: ```{r semantic-all} all_results <- client$search$semantic_all( "chronic kidney disease", page_size = 50, max_pages = 5, progress = TRUE ) print(nrow(all_results)) ``` ## Similarity Search Find concepts similar to a reference concept: ```{r similar-by-id} # Find concepts similar to Type 2 diabetes mellitus similar <- client$search$similar(concept_id = 201826) for (s in similar$similar_concepts) { cat(sprintf("%s (score: %.2f)\n", s$concept_name, s$similarity_score)) } ``` Search by natural language query with different algorithms: ```{r similar-by-query} # Semantic similarity (neural embeddings) similar <- client$search$similar( query = "high blood pressure", algorithm = "semantic" ) # Lexical similarity (string matching) similar <- client$search$similar( query = "high blood pressure", algorithm = "lexical" ) # Hybrid (combined - default) similar <- client$search$similar( query = "high blood pressure", algorithm = "hybrid", include_scores = TRUE, include_explanations = TRUE ) ``` ## Bulk Search Search for multiple queries in a single API call — much faster than individual requests when you have many terms to look up. ### Bulk Lexical Search Execute up to 50 keyword searches at once: ```{r bulk-basic} results <- client$search$bulk_basic(list( list(search_id = "q1", query = "diabetes mellitus"), list(search_id = "q2", query = "hypertension"), list(search_id = "q3", query = "aspirin") ), defaults = list(vocabulary_ids = list("SNOMED"), page_size = 5)) # Each result is matched by search_id for (item in results$results) { cat(sprintf("%s: %d results\n", item$search_id, length(item$results))) } ``` ### Bulk Semantic Search Execute up to 25 natural-language searches using neural embeddings: ```{r bulk-semantic} results <- client$search$bulk_semantic(list( list(search_id = "s1", query = "heart failure treatment options"), list(search_id = "s2", query = "type 2 diabetes medication") ), defaults = list(threshold = 0.5, page_size = 10)) for (item in results$results) { cat(sprintf("%s: %d results\n", item$search_id, item$result_count %||% length(item$results))) } ``` Defaults apply to all searches; per-search values override them: ```{r bulk-overrides} # Different domains per query, shared vocabulary filter results <- client$search$bulk_basic(list( list(search_id = "cond", query = "diabetes", domain_ids = list("Condition")), list(search_id = "drug", query = "metformin", domain_ids = list("Drug")) ), defaults = list(vocabulary_ids = list("SNOMED", "RxNorm"), page_size = 5)) ``` ## Autocomplete Get suggestions for autocomplete: ```{r autocomplete} suggestions <- client$concepts$suggest("diab", page_size = 5) for (s in suggestions$suggestions) { print(s$suggestion) } ``` ## Pagination ### Manual Pagination ```{r pagination-manual} # First page results <- client$search$basic("diabetes", page = 1, page_size = 50) # Check pagination info print(results$meta$total_items) print(results$meta$has_next) # Get next page if available if (isTRUE(results$meta$has_next)) { page2 <- client$search$basic("diabetes", page = 2, page_size = 50) } ``` ### Automatic Pagination Fetch all results as a tibble: ```{r pagination-auto} all_results <- client$search$basic_all( "diabetes", page_size = 100, max_pages = 5, progress = TRUE ) # Results are a tibble print(nrow(all_results)) print(names(all_results)) ``` ## Hierarchy Navigation Get ancestors (parent concepts): ```{r ancestors} result <- client$hierarchy$ancestors(201826, max_levels = 3) for (ancestor in result$ancestors) { print(ancestor$concept_name) } ``` Get descendants (child concepts): ```{r descendants} result <- client$hierarchy$descendants(201826, max_levels = 2) for (descendant in result$descendants) { print(descendant$concept_name) } ``` ## Concept Mappings Find how a concept maps to other vocabularies: ```{r mappings} result <- client$mappings$get(201826) for (mapping in result$mappings) { cat(sprintf("%s: %s\n", mapping$target_vocabulary_id, mapping$target_concept_name)) } ``` Map to specific vocabularies: ```{r mappings-filter} result <- client$mappings$get( 201826, target_vocabulary = "ICD10CM" ) ``` ## Error Handling Use `tryCatch` to handle errors: ```{r error-handling} tryCatch( { concept <- client$concepts$get(999999999) }, omophub_not_found = function(e) { message("Concept not found: ", e$message) }, omophub_api_error = function(e) { message("API error: ", e$message) } ) ``` ## FHIR-to-OMOP Resolution The FHIR resolver translates FHIR coded values to OMOP standard concepts in a single call --- handling URI mapping, code lookup, `Maps to` traversal, and CDM target table assignment automatically. ### Single Coding ```{r fhir-resolve} result <- client$fhir$resolve( system = "http://snomed.info/sct", code = "44054006", resource_type = "Condition" ) cat(result$resolution$standard_concept$concept_name) cat(result$resolution$target_table) # "condition_occurrence" cat(result$resolution$mapping_type) # "direct" ``` ### Non-Standard Code (Automatic Maps-to Traversal) ```{r fhir-mapped} result <- client$fhir$resolve( system = "http://hl7.org/fhir/sid/icd-10-cm", code = "E11.9" ) cat(result$resolution$mapping_type) # "mapped" cat(result$resolution$standard_concept$vocabulary_id) # "SNOMED" ``` ### Batch Resolution ```{r fhir-batch} batch <- client$fhir$resolve_batch(list( list(system = "http://snomed.info/sct", code = "44054006"), list(system = "http://loinc.org", code = "2339-0") )) cat(sprintf("Resolved: %d/%d\n", batch$summary$resolved, batch$summary$total)) ``` ### CodeableConcept with Vocabulary Preference ```{r fhir-codeable} result <- client$fhir$resolve_codeable_concept( coding = list( list(system = "http://snomed.info/sct", code = "44054006"), list(system = "http://hl7.org/fhir/sid/icd-10-cm", code = "E11.9") ), resource_type = "Condition" ) # SNOMED wins over ICD-10-CM per OHDSI preference cat(result$best_match$resolution$source_concept$vocabulary_id) # "SNOMED" ``` To override the vocabulary preference, mark a coding with `user_selected = TRUE` (mirroring FHIR `Coding.userSelected`); that coding then wins `best_match` regardless of its vocabulary: ```r client$fhir$resolve_codeable_concept( coding = list( list( system = "http://hl7.org/fhir/sid/icd-10-cm", code = "E11.9", user_selected = TRUE ), list(system = "http://snomed.info/sct", code = "44054006") ), resource_type = "Condition" ) ``` ### Tibble Output for Batch Resolution For `dplyr` / `tidyr` workflows, pass `as_tibble = TRUE` to get a flat tibble with one row per input coding and columns for the source and standard concepts, target CDM table, mapping type, and resolution status. Composite concepts decomposed via `Maps to value` (the HL7 FHIR-to-OMOP IG Value-as-Concept pattern) also populate `value_as_concept_id` / `value_as_concept_name`. This is the most ergonomic shape for ETL pipelines processing many codes: ```{r fhir-batch-tibble} library(dplyr) tbl <- client$fhir$resolve_batch( list( list(system = "http://hl7.org/fhir/sid/icd-10-cm", code = "E11.9"), list(system = "http://hl7.org/fhir/sid/icd-10-cm", code = "I10"), list(system = "http://hl7.org/fhir/sid/icd-10-cm", code = "J45.909") ), as_tibble = TRUE ) tbl |> filter(status == "resolved") |> select(source_code, standard_concept_name, target_table) ``` Failed rows are kept in-place with `status = "failed"` and the error text in `status_detail` --- you can filter them out rather than silently dropping them. The batch summary (`total` / `resolved` / `failed`) is attached as an attribute: ```{r fhir-batch-summary} attr(tbl, "summary") ``` The default `as_tibble = FALSE` still returns the list-shaped `list(results, summary)`, so existing code keeps working unchanged. ### Standalone Wrapper Functions For pipe-friendly workflows, `omophub` also exports standalone wrapper functions that take the client as their first argument. Both forms are fully supported --- pick whichever reads better for the surrounding code: ```{r fhir-standalone-wrappers} # Equivalent to client$fhir$resolve() client |> fhir_resolve( system = "http://snomed.info/sct", code = "44054006", resource_type = "Condition" ) # Tibble-shaped batch in a pipe tbl <- client |> fhir_resolve_batch( codings = list( list(system = "http://snomed.info/sct", code = "44054006"), list(system = "http://loinc.org", code = "2339-0") ), as_tibble = TRUE ) client |> fhir_resolve_codeable_concept( coding = list( list(system = "http://snomed.info/sct", code = "44054006"), list(system = "http://hl7.org/fhir/sid/icd-10-cm", code = "E11.9") ), resource_type = "Condition" ) ``` ### FHIR Client Interop with `omophub_fhir_url()` When you need raw FHIR `Parameters` / `Bundle` responses instead of the Concept Resolver envelope, `omophub_fhir_url()` returns the OMOPHub FHIR Terminology Service base URL so you can talk to it directly with `httr2` or `fhircrackr`. Supports FHIR versions `"r4"` (default), `"r4b"`, `"r5"`, and `"r6"`: ```{r fhir-url-helper} omophub_fhir_url() #> "https://fhir.omophub.com/fhir/r4" omophub_fhir_url("r5") #> "https://fhir.omophub.com/fhir/r5" ``` Example: call `CodeSystem/$lookup` directly with `httr2`: ```{r fhir-httr2} library(httr2) resp <- request(omophub_fhir_url()) |> req_url_path_append("CodeSystem/$lookup") |> req_url_query( system = "http://snomed.info/sct", code = "44054006" ) |> req_headers(Authorization = paste("Bearer", Sys.getenv("OMOPHUB_API_KEY"))) |> req_perform() params <- resp_body_json(resp) # Raw FHIR Parameters resource with the concept display and designations. ``` Use `client$fhir$resolve()` (or `fhir_resolve()`) when you want OMOP-enriched answers (standard concept, CDM target table, mapping quality). Use `omophub_fhir_url()` + `httr2` when you need FHIR-native responses for FHIR-native tooling. ## Further Resources - [OMOPHub API Documentation](https://docs.omophub.com) - [Package Documentation](https://omophub.github.io/omophub-R/) - [GitHub Repository](https://github.com/omophub/omophub-R)