Codelist Generation¶
The omopy.codelist module provides tools for searching OMOP vocabularies,
building codelists, and analysing concept usage. It is the Python equivalent
of the R CodelistGenerator package.
Searching for Concepts¶
from omopy.codelist import get_candidate_codes
# Search by keyword
codelist = get_candidate_codes(
cdm,
keywords=["sinusitis"],
domains=["Condition"],
standard_concept="S",
)
print(codelist)
# Codelist({"sinusitis": [40481087, 257012, 4283893, ...]})
Search Options¶
codelist = get_candidate_codes(
cdm,
keywords=["diabetes", "mellitus"],
domains=["Condition"], # restrict to specific domains
standard_concept="S", # standard concepts only
vocabulary_id=["SNOMED"], # restrict to specific vocabularies
concept_class_id=["Disorder"], # restrict to concept classes
exclude=["insipidus"], # exclude concepts matching these
)
Concept Mappings¶
from omopy.codelist import get_mappings
# Find standard concepts mapped from source codes
mapped = get_mappings(
cdm,
codelist,
relationship_id="Maps to",
)
Hierarchy Traversal¶
from omopy.codelist import get_descendants, get_ancestors
# Expand codelist to include descendant concepts
expanded = get_descendants(cdm, codelist)
# Find ancestor concepts
ancestors = get_ancestors(cdm, codelist)
Drug-Specific Functions¶
from omopy.codelist import get_drug_ingredient_codes, get_atc_codes
# Find drug ingredients by keyword
ingredients = get_drug_ingredient_codes(cdm, ingredient="ibuprofen")
# Find ATC codes
atc = get_atc_codes(cdm, atc_name="anti-inflammatory", level="3")
Codelist Operations¶
from omopy.codelist import union_codelists, intersect_codelists, compare_codelists
# Combine codelists
combined = union_codelists(codelist_a, codelist_b)
# Find common concepts
common = intersect_codelists(codelist_a, codelist_b)
# Compare two codelists
comparison = compare_codelists(codelist_a, codelist_b)
Subsetting¶
from omopy.codelist import subset_by_domain, subset_by_vocabulary, subset_to_codes_in_use
# Keep only Condition concepts
conditions_only = subset_by_domain(codelist, cdm, domain_id="Condition")
# Keep only SNOMED concepts
snomed_only = subset_by_vocabulary(codelist, cdm, vocabulary_id="SNOMED")
# Keep only concepts that appear in the data
in_use = subset_to_codes_in_use(codelist, cdm)
Stratification¶
from omopy.codelist import stratify_by_domain, stratify_by_vocabulary, stratify_by_concept_class
# Split by domain
by_domain = stratify_by_domain(codelist, cdm)
# {"Condition": Codelist(...), "Drug": Codelist(...), ...}
# Split by vocabulary
by_vocab = stratify_by_vocabulary(codelist, cdm)
# Split by concept class
by_class = stratify_by_concept_class(codelist, cdm)
Diagnostics¶
from omopy.codelist import summarise_code_use, summarise_orphan_codes
# How often do codelist concepts appear in the data?
usage = summarise_code_use(codelist, cdm)
print(usage) # Polars DataFrame with counts per concept per domain
# Find orphan codes (in hierarchy but not in codelist, yet present in data)
orphans = summarise_orphan_codes(codelist, cdm)
print(orphans)