This vignette reproduces the LaminDB Introduction guide. The equivalent {laminr} code is included here, for the related text see the associated links. This vignette requires the bionty Python package to be available.
lamin_init_temp(name = "laminr-intro", modules = c("bionty"))
NOTE: We have used a lamin_init_temp()
to create a temporary instance for this vignette but in most cases you
should use regular lamin_init()
ln <- import_module("lamindb")
df <- ln$core$datasets$small_dataset1(otype = "DataFrame", with_typo = TRUE)
artifact <- ln$Artifact$from_df(df, key = "my_datasets/rnaseq1.parquet")$save()
#> [1] "/tmp/RtmpJ8A8HB/laminr-intro-20250310172937/.lamindb/0QKuoYMA4v42mpaH0000.parquet"
dataset <- artifact$open()
#> ✖ `view_lineage()` is not yet implemented. Please view the lineage in the web interface.
df_typo <- df
levels(df$perturbation) <- c("DMSO", "IFNG")
df["sample2", "perturbation"] <- "IFNG"
artifact <- ln$Artifact$from_df(df, key = "my_datasets/rnaseq1.parquet")$save()
#> → creating new artifact version for key='my_datasets/rnaseq1.parquet' (storage: '/tmp/RtmpJ8A8HB/laminr-intro-20250310172937')
bt <- import_module("bionty")
experiment_type <- ln$ULabel(name = "Experiment", is_type = TRUE)$save()
candidate_marker_experiment <- ln$ULabel(
name = "Candidate marker experiment", type = experiment_type
cell_type <- bt$CellType$from_source(name = "effector T cell")$save()
Query & search
transform <- ln$Transform$get(key = "introduction.Rmd")
ln$Artifact$filter(key__startswith = "my_datasets/")$df()
artifacts <- ln$Artifact$filter(transform = transform)$all()
artifacts <- ln$Artifact$filter(
transform__description__icontains = "intro", ulabels = candidate_marker_experiment
ulabels <- ln$ULabel$lookup()
cell_types <- bt$CellType$lookup()
ln$Feature(name = "temperature", dtype = "float")$save()
#> Feature(uid='Y51YJDN3oiJg', name='temperature', dtype='float', array_rank=0, array_size=0, space_id=1, created_by_id=1, run_id=1, created_at=2025-03-10 17:29:58 UTC)
ln$Feature(name = "experiment", dtype = ln$ULabel)$save()
#> Feature(uid='fQ9c1yaruwJu', name='experiment', dtype='cat[ULabel]', array_rank=0, array_size=0, space_id=1, created_by_id=1, run_id=1, created_at=2025-03-10 17:29:58 UTC)
list("temperature" = 21.6, "experiment" = "Candidate marker experiment")
ln$Artifact$features$filter(experiment__contains = "marker experiment")$df()
Key use cases
Understand data lineage
Curate datasets
# Load Python built ins to get access to dtypes
py_builtins <- reticulate::import_builtins()
schema <- ln$Schema(
name = "My DataFrame schema",
features = list(
# NOTE: These have dtype=int in the original guide
ln$Feature(name = "ENSG00000153563", dtype = py_builtins$float)$save(),
ln$Feature(name = "ENSG00000010610", dtype = py_builtins$float)$save(),
ln$Feature(name = "ENSG00000170458", dtype = py_builtins$float)$save(),
ln$Feature(name = "perturbation", dtype = ln$ULabel)$save()
curator <- ln$curators$DataFrameCurator(df, schema)
artifact <- curator$save_artifact(key = "my_curated_dataset.parquet")
ln$Artifact$get(ulabels__name = "IFNG")
#> Artifact(uid='0QKuoYMA4v42mpaH0001', is_latest=True, key='my_datasets/rnaseq1.parquet', suffix='.parquet', kind='dataset', otype='DataFrame', size=6120, hash='O69yLgP32m9XBvvw_7WWxg', n_observations=3, space_id=1, storage_id=1, run_id=1, schema_id=1, created_by_id=1, created_at=2025-03-10 17:29:56 UTC)
curator <- ln$curators$DataFrameCurator(df_typo, schema)
error = function(err) {
Manage biological registries
cell_types <- bt$CellType$public()
var_schema <- ln$Schema(
name = "my_var_schema",
itype = bt$Gene$ensembl_gene_id,
dtype = py_builtins$float
obs_schema <- ln$Schema(
name = "my_obs_schema",
features = list(
ln$Feature(name = "perturbation", dtype = ln$ULabel)$save()
#> → returning existing Feature record with same name: 'perturbation'
anndata_schema <- ln$Schema(
name = "my_anndata_schema",
otype = "AnnData",
components = list("obs" = obs_schema, "var" = var_schema)
adata <- AnnData(
df[c("ENSG00000153563", "ENSG00000010610", "ENSG00000170458")],
obs = df[, "perturbation", drop = FALSE]
curator <- ln$curators$AnnDataCurator(adata, anndata_schema)
genes <- bt$Gene$filter(organism__name = "human")$lookup()
feature_sets <- ln$FeatureSet$filter(genes = genes$cd8a)$all()
ln$Artifact$filter(feature_sets__in = feature_sets)$df()
neuron <- bt$CellType$from_source(name = "neuron")$save()
#> ✓ created 1 CellType record from Bionty matching name: 'neuron'
#> ✓ created 3 CellType records from Bionty matching ontology_id: 'CL:0002319', 'CL:0000404', 'CL:0000393'
new_cell_state <- bt$CellType(
name = "my neuron cell state", description = "explains X"
new_cell_state$view_parents(distance = 2)
Scale learning
df2 <- ln$core$datasets$small_dataset2(otype = "DataFrame")
adata <- AnnData(
df2[c("ENSG00000153563", "ENSG00000010610", "ENSG00000004468")],
obs = df2[, "perturbation", drop = FALSE]
curator <- ln$curators$AnnDataCurator(adata, anndata_schema)
collection <- ln$Collection(
list(artifact, artifact2),
key = "my-RNA-seq-collection"
