Introduction
This vignette reproduces the LaminDB Introduction guide. The equivalent {laminr} code is included here, for the related text see the associated links. This vignette requires the bionty Python package to be available.
Walkthrough
See https://docs.lamin.ai/guide#walkthrough.
lamin_init_temp(name = "laminr-intro", modules = c("bionty"))
NOTE: We have used a lamin_init_temp()
to create a temporary instance for this vignette but in most cases you
should use regular lamin_init()
Transforms
See https://docs.lamin.ai/guide#transforms.
ln <- import_module("lamindb")
#> → connected lamindb: anonymous/laminr-intro-20250310172937
ln$track()
#> → created Transform('k0fqNQTwFlA70000'), started new Run('uucVQFwb...') at 2025-03-10 17:29:53 UTC
ln$Transform$df()
#> uid key description type source_code hash
#> 1 k0fqNQTwFlA70000 introduction.Rmd introduction.Rmd notebook <NA> <NA>
#> reference reference_type space_id _template_id version is_latest
#> 1 <NA> <NA> 1 <NA> <NA> TRUE
#> created_at created_by_id _aux _branch_code
#> 1 2025-03-10 17:29:53 1 <NA> 1
ln$Run$df()
#> uid name started_at finished_at reference
#> 1 uucVQFwbTRzJtomI5Hvd <NA> 2025-03-10 17:29:53 <NA> <NA>
#> reference_type _is_consecutive _status_code space_id transform_id report_id
#> 1 <NA> <NA> 0 1 1 <NA>
#> _logfile_id environment_id initiated_by_run_id created_at
#> 1 <NA> <NA> <NA> 2025-03-10 17:29:53
#> created_by_id _aux _branch_code
#> 1 1 <NA> 1
Artifacts
See https://docs.lamin.ai/guide#artifacts.
df <- ln$core$datasets$small_dataset1(otype = "DataFrame", with_typo = TRUE)
df
#> ENSG00000153563 ENSG00000010610 ENSG00000170458 perturbation
#> sample1 1 3 5 DMSO
#> sample2 2 4 6 IFNJ
#> sample3 3 5 7 DMSO
#> sample_note cell_type_by_expert cell_type_by_model
#> sample1 was ok B cell B cell
#> sample2 looks naah T cell T cell
#> sample3 pretty! 🤩 T cell T cell
artifact <- ln$Artifact$from_df(df, key = "my_datasets/rnaseq1.parquet")$save()
artifact$describe()
#> Artifact .parquet/DataFrame
#> └── General
#> ├── .uid = '0QKuoYMA4v42mpaH0000'
#> ├── .key = 'my_datasets/rnaseq1.parquet'
#> ├── .size = 6120
#> ├── .hash = 'cD9NSUzJ3YzfmybCQ4Ab9w'
#> ├── .n_observations = 3
#> ├── .path =
#> │ /tmp/RtmpJ8A8HB/laminr-intro-20250310172937/.lamindb/0QKuoYMA4v42mpaH000
#> │ 0.parquet
#> ├── .created_by = anonymous
#> ├── .created_at = 2025-03-10 17:29:54
#> └── .transform = 'introduction.Rmd'
artifact$cache()
#> [1] "/tmp/RtmpJ8A8HB/laminr-intro-20250310172937/.lamindb/0QKuoYMA4v42mpaH0000.parquet"
dataset <- artifact$open()
as.data.frame(dataset)
#> # A data frame: 3 × 8
#> ENSG00000153563 ENSG00000010610 ENSG00000170458 perturbation sample_note
#> <dbl> <dbl> <dbl> <fct> <chr>
#> 1 1 3 5 DMSO was ok
#> 2 2 4 6 IFNJ looks naah
#> 3 3 5 7 DMSO pretty! 🤩
#> # ℹ 3 more variables: cell_type_by_expert <fct>, cell_type_by_model <fct>,
#> # `__index_level_0__` <chr>
artifact$load()
#> ENSG00000153563 ENSG00000010610 ENSG00000170458 perturbation
#> sample1 1 3 5 DMSO
#> sample2 2 4 6 IFNJ
#> sample3 3 5 7 DMSO
#> sample_note cell_type_by_expert cell_type_by_model
#> sample1 was ok B cell B cell
#> sample2 looks naah T cell T cell
#> sample3 pretty! 🤩 T cell T cell
artifact$view_lineage()
#> ✖ `view_lineage()` is not yet implemented. Please view the lineage in the web interface.
df_typo <- df
levels(df$perturbation) <- c("DMSO", "IFNG")
df["sample2", "perturbation"] <- "IFNG"
artifact <- ln$Artifact$from_df(df, key = "my_datasets/rnaseq1.parquet")$save()
#> → creating new artifact version for key='my_datasets/rnaseq1.parquet' (storage: '/tmp/RtmpJ8A8HB/laminr-intro-20250310172937')
artifact$versions$df()
#> uid key description suffix kind
#> 1 0QKuoYMA4v42mpaH0000 my_datasets/rnaseq1.parquet <NA> .parquet dataset
#> 2 0QKuoYMA4v42mpaH0001 my_datasets/rnaseq1.parquet <NA> .parquet dataset
#> otype size hash n_files n_observations _hash_type
#> 1 DataFrame 6120 cD9NSUzJ3YzfmybCQ4Ab9w <NA> 3 md5
#> 2 DataFrame 6120 O69yLgP32m9XBvvw_7WWxg <NA> 3 md5
#> _key_is_virtual _overwrite_versions space_id storage_id schema_id version
#> 1 TRUE FALSE 1 1 <NA> <NA>
#> 2 TRUE FALSE 1 1 <NA> <NA>
#> is_latest run_id created_at created_by_id _aux _branch_code
#> 1 FALSE 1 2025-03-10 17:29:54 1 <NA> 1
#> 2 TRUE 1 2025-03-10 17:29:56 1 <NA> 1
Labels
See https://docs.lamin.ai/guide#labels.
bt <- import_module("bionty")
experiment_type <- ln$ULabel(name = "Experiment", is_type = TRUE)$save()
candidate_marker_experiment <- ln$ULabel(
name = "Candidate marker experiment", type = experiment_type
)$save()
artifact$ulabels$add(candidate_marker_experiment)
cell_type <- bt$CellType$from_source(name = "effector T cell")$save()
artifact$cell_types$add(cell_type)
artifact$describe()
#> Artifact .parquet/DataFrame
#> ├── General
#> │ ├── .uid = '0QKuoYMA4v42mpaH0001'
#> │ ├── .key = 'my_datasets/rnaseq1.parquet'
#> │ ├── .size = 6120
#> │ ├── .hash = 'O69yLgP32m9XBvvw_7WWxg'
#> │ ├── .n_observations = 3
#> │ ├── .path =
#> │ │ /tmp/RtmpJ8A8HB/laminr-intro-20250310172937/.lamindb/0QKuoYMA4v42mpaH000
#> │ │ 1.parquet
#> │ ├── .created_by = anonymous
#> │ ├── .created_at = 2025-03-10 17:29:56
#> │ └── .transform = 'introduction.Rmd'
#> └── Labels
#> └── .cell_types bionty.CellType effector T cell
#> .ulabels ULabel Candidate marker experiment
Registries
See https://docs.lamin.ai/guide#registries.
ln$ULabel$df()
#> uid name is_type description reference
#> 2 nfNRAt9Z Candidate marker experiment FALSE <NA> <NA>
#> 1 ISDFDrug Experiment TRUE <NA> <NA>
#> reference_type space_id type_id run_id created_at created_by_id _aux
#> 2 <NA> 1 1 1 2025-03-10 17:29:56 1 <NA>
#> 1 <NA> 1 NaN 1 2025-03-10 17:29:56 1 <NA>
#> _branch_code
#> 2 1
#> 1 1
ln$Artifact
#> Artifact
#> Simple fields
#> .uid: CharField
#> .key: CharField
#> .description: CharField
#> .suffix: CharField
#> .kind: CharField
#> .otype: CharField
#> .size: BigIntegerField
#> .hash: CharField
#> .n_files: BigIntegerField
#> .n_observations: BigIntegerField
#> .version: CharField
#> .is_latest: BooleanField
#> .created_at: DateTimeField
#> .updated_at: DateTimeField
#> Relational fields
#> .space: Space
#> .storage: Storage
#> .run: Run
#> .schema: Schema
#> .created_by: User
#> .ulabels: ULabel
#> .input_of_runs: Run
#> .feature_sets: Schema
#> .collections: Collection
#> .references: Reference
#> .projects: Project
#> Bionty fields
#> .organisms: bionty.Organism
#> .genes: bionty.Gene
#> .proteins: bionty.Protein
#> .cell_markers: bionty.CellMarker
#> .tissues: bionty.Tissue
#> .cell_types: bionty.CellType
#> .diseases: bionty.Disease
#> .cell_lines: bionty.CellLine
#> .phenotypes: bionty.Phenotype
#> .pathways: bionty.Pathway
#> .experimental_factors: bionty.ExperimentalFactor
#> .developmental_stages: bionty.DevelopmentalStage
#> .ethnicities: bionty.Ethnicity
#> signature: (*args, **kwargs)
Query & search
See https://docs.lamin.ai/guide#query-search.
transform <- ln$Transform$get(key = "introduction.Rmd")
ln$Artifact$filter(key__startswith = "my_datasets/")$df()
#> uid key description suffix kind
#> 1 0QKuoYMA4v42mpaH0000 my_datasets/rnaseq1.parquet <NA> .parquet dataset
#> 2 0QKuoYMA4v42mpaH0001 my_datasets/rnaseq1.parquet <NA> .parquet dataset
#> otype size hash n_files n_observations _hash_type
#> 1 DataFrame 6120 cD9NSUzJ3YzfmybCQ4Ab9w <NA> 3 md5
#> 2 DataFrame 6120 O69yLgP32m9XBvvw_7WWxg <NA> 3 md5
#> _key_is_virtual _overwrite_versions space_id storage_id schema_id version
#> 1 TRUE FALSE 1 1 <NA> <NA>
#> 2 TRUE FALSE 1 1 <NA> <NA>
#> is_latest run_id created_at created_by_id _aux _branch_code
#> 1 FALSE 1 2025-03-10 17:29:54 1 <NA> 1
#> 2 TRUE 1 2025-03-10 17:29:56 1 <NA> 1
artifacts <- ln$Artifact$filter(transform = transform)$all()
artifacts <- ln$Artifact$filter(
transform__description__icontains = "intro", ulabels = candidate_marker_experiment
)$all()
ln$Transform$search("intro")$df()
#> uid key description type source_code hash
#> 1 k0fqNQTwFlA70000 introduction.Rmd introduction.Rmd notebook <NA> <NA>
#> reference reference_type space_id _template_id version is_latest
#> 1 <NA> <NA> 1 <NA> <NA> TRUE
#> created_at created_by_id _aux _branch_code
#> 1 2025-03-10 17:29:53 1 <NA> 1
ulabels <- ln$ULabel$lookup()
cell_types <- bt$CellType$lookup()
Features
See https://docs.lamin.ai/guide#features.
ln$Feature(name = "temperature", dtype = "float")$save()
#> Feature(uid='Y51YJDN3oiJg', name='temperature', dtype='float', array_rank=0, array_size=0, space_id=1, created_by_id=1, run_id=1, created_at=2025-03-10 17:29:58 UTC)
ln$Feature(name = "experiment", dtype = ln$ULabel)$save()
#> Feature(uid='fQ9c1yaruwJu', name='experiment', dtype='cat[ULabel]', array_rank=0, array_size=0, space_id=1, created_by_id=1, run_id=1, created_at=2025-03-10 17:29:58 UTC)
artifact$features$add_values(
list("temperature" = 21.6, "experiment" = "Candidate marker experiment")
)
artifact$describe()
#> Artifact .parquet/DataFrame
#> ├── General
#> │ ├── .uid = '0QKuoYMA4v42mpaH0001'
#> │ ├── .key = 'my_datasets/rnaseq1.parquet'
#> │ ├── .size = 6120
#> │ ├── .hash = 'O69yLgP32m9XBvvw_7WWxg'
#> │ ├── .n_observations = 3
#> │ ├── .path =
#> │ │ /tmp/RtmpJ8A8HB/laminr-intro-20250310172937/.lamindb/0QKuoYMA4v42mpaH000
#> │ │ 1.parquet
#> │ ├── .created_by = anonymous
#> │ ├── .created_at = 2025-03-10 17:29:56
#> │ └── .transform = 'introduction.Rmd'
#> ├── Linked features
#> │ └── experiment cat[ULabel] Candidate marker experiment
#> │ temperature float 21.6
#> └── Labels
#> └── .cell_types bionty.CellType effector T cell
#> .ulabels ULabel Candidate marker experiment
ln$Artifact$features$filter(experiment__contains = "marker experiment")$df()
#> uid key description suffix kind
#> 2 0QKuoYMA4v42mpaH0001 my_datasets/rnaseq1.parquet <NA> .parquet dataset
#> otype size hash n_files n_observations _hash_type
#> 2 DataFrame 6120 O69yLgP32m9XBvvw_7WWxg <NA> 3 md5
#> _key_is_virtual _overwrite_versions space_id storage_id schema_id version
#> 2 TRUE FALSE 1 1 <NA> <NA>
#> is_latest run_id created_at created_by_id _aux _branch_code
#> 2 TRUE 1 2025-03-10 17:29:56 1 <NA> 1
Key use cases
See https://docs.lamin.ai/guide#key-use-cases.
Understand data lineage
See https://docs.lamin.ai/guide#understand-data-lineage.
artifact$view_lineage()
#> ✖ `view_lineage()` is not yet implemented. Please view the lineage in the web interface.
transform$view_lineage()
#> ✖ `view_lineage()` is not yet implemented. Please view the lineage in the web interface.
# Example only, not run
ln <- import_module("lamindb")
ln$track()
ln$finish()
# lamin load https://lamin.ai/laminlabs/lamindata/transform/13VINnFk89PE0004
Curate datasets
See https://docs.lamin.ai/introduction#curate-datasets.
perturbation_type <- ln$ULabel(name = "Perturbation", is_type = TRUE)$save()
ln$ULabel(name = "DMSO", type = perturbation_type)$save()
#> ULabel(uid='57q8hOBE', name='DMSO', is_type=False, space_id=1, created_by_id=1, run_id=1, type_id=3, created_at=2025-03-10 17:29:59 UTC)
ln$ULabel(name = "IFNG", type = perturbation_type)$save()
#> ULabel(uid='Q3ZqXruz', name='IFNG', is_type=False, space_id=1, created_by_id=1, run_id=1, type_id=3, created_at=2025-03-10 17:29:59 UTC)
# Load Python built ins to get access to dtypes
py_builtins <- reticulate::import_builtins()
schema <- ln$Schema(
name = "My DataFrame schema",
features = list(
# NOTE: These have dtype=int in the original guide
ln$Feature(name = "ENSG00000153563", dtype = py_builtins$float)$save(),
ln$Feature(name = "ENSG00000010610", dtype = py_builtins$float)$save(),
ln$Feature(name = "ENSG00000170458", dtype = py_builtins$float)$save(),
ln$Feature(name = "perturbation", dtype = ln$ULabel)$save()
)
)$save()
curator <- ln$curators$DataFrameCurator(df, schema)
artifact <- curator$save_artifact(key = "my_curated_dataset.parquet")
#> ✓ "perturbation" is validated against ULabel.name
#> → returning existing artifact with same hash: Artifact(uid='0QKuoYMA4v42mpaH0001', is_latest=True, key='my_datasets/rnaseq1.parquet', suffix='.parquet', kind='dataset', otype='DataFrame', size=6120, hash='O69yLgP32m9XBvvw_7WWxg', n_observations=3, space_id=1, storage_id=1, run_id=1, created_by_id=1, created_at=2025-03-10 17:29:56 UTC); to track this artifact as an input, use: ln.Artifact.get()
#> ! key my_datasets/rnaseq1.parquet on existing artifact differs from passed key my_curated_dataset.parquet
#> ✓ 4 unique terms (57.10%) are validated for name
#> ! 3 unique terms (42.90%) are not validated for name: 'sample_note', 'cell_type_by_expert', 'cell_type_by_model'
#> ✓ loaded 4 Feature records matching name: 'ENSG00000153563', 'ENSG00000010610', 'ENSG00000170458', 'perturbation'
#> ! did not create Feature records for 3 non-validated names: 'cell_type_by_expert', 'cell_type_by_model', 'sample_note'
#> → returning existing schema with same hash: Schema(uid='A2G3AnSXsO60eoiHj6d0', name='My DataFrame schema', n=4, itype='Feature', is_type=False, hash='cMIAATRcLJccyj67Jp5Jsw', minimal_set=True, ordered_set=False, maximal_set=False, space_id=1, created_by_id=1, run_id=1, created_at=2025-03-10 17:29:59 UTC)
#> ! updated otype from None to DataFrame
artifact$describe()
#> Artifact .parquet/DataFrame
#> ├── General
#> │ ├── .uid = '0QKuoYMA4v42mpaH0001'
#> │ ├── .key = 'my_datasets/rnaseq1.parquet'
#> │ ├── .size = 6120
#> │ ├── .hash = 'O69yLgP32m9XBvvw_7WWxg'
#> │ ├── .n_observations = 3
#> │ ├── .path =
#> │ │ /tmp/RtmpJ8A8HB/laminr-intro-20250310172937/.lamindb/0QKuoYMA4v42mpaH000
#> │ │ 1.parquet
#> │ ├── .created_by = anonymous
#> │ ├── .created_at = 2025-03-10 17:29:56
#> │ └── .transform = 'introduction.Rmd'
#> ├── Dataset features/.feature_sets
#> │ └── columns • 4 [Feature]
#> │ perturbation cat[ULabel] DMSO, IFNG
#> │ ENSG00000153563 float
#> │ ENSG00000010610 float
#> │ ENSG00000170458 float
#> ├── Linked features
#> │ └── experiment cat[ULabel] Candidate marker experiment
#> │ temperature float 21.6
#> └── Labels
#> └── .cell_types bionty.CellType effector T cell
#> .ulabels ULabel Candidate marker experiment, DMS…
ln$Artifact$get(ulabels__name = "IFNG")
#> Artifact(uid='0QKuoYMA4v42mpaH0001', is_latest=True, key='my_datasets/rnaseq1.parquet', suffix='.parquet', kind='dataset', otype='DataFrame', size=6120, hash='O69yLgP32m9XBvvw_7WWxg', n_observations=3, space_id=1, storage_id=1, run_id=1, schema_id=1, created_by_id=1, created_at=2025-03-10 17:29:56 UTC)
curator <- ln$curators$DataFrameCurator(df_typo, schema)
tryCatch(
curator$validate(),
error = function(err) {
cat(conditionMessage(err))
}
)
#> • mapping "perturbation" on ULabel.name
#> ! 1 term is not validated: 'IFNJ'
#> → fix typos, remove non-existent values, or save terms via .add_new_from("perturbation")
#> lamindb.errors.ValidationError: 1 term is not validated: 'IFNJ'
#> → fix typos, remove non-existent values, or save terms via .add_new_from("perturbation")
#> Run `reticulate::py_last_error()` for details.
Manage biological registries
See https://docs.lamin.ai/introduction#manage-biological-registries.
cell_types <- bt$CellType$public()
cell_types
#> PublicOntology
#> Entity: CellType
#> Organism: all
#> Source: cl, 2024-08-16
#> #terms: 2959
cell_types$search("gamma-delta T cell") |> head(2)
#> name
#> CL:0000798 gamma-delta T cell
#> CL:4033072 cycling gamma-delta T cell
#> definition
#> CL:0000798 A T Cell That Expresses A Gamma-Delta T Cell Receptor Complex.
#> CL:4033072 A(N) Gamma-Delta T Cell That Is Cycling.
#> synonyms
#> CL:0000798 gamma-delta T-cell|gamma-delta T lymphocyte|gammadelta T cell|gamma-delta T-lymphocyte
#> CL:4033072 proliferating gamma-delta T cell
#> parents
#> CL:0000798 CL:0000084
#> CL:4033072 CL:4033069, CL:0000798
var_schema <- ln$Schema(
name = "my_var_schema",
itype = bt$Gene$ensembl_gene_id,
dtype = py_builtins$float
)$save()
obs_schema <- ln$Schema(
name = "my_obs_schema",
features = list(
ln$Feature(name = "perturbation", dtype = ln$ULabel)$save()
)
)$save()
#> → returning existing Feature record with same name: 'perturbation'
anndata_schema <- ln$Schema(
name = "my_anndata_schema",
otype = "AnnData",
components = list("obs" = obs_schema, "var" = var_schema)
)$save()
library(anndata)
adata <- AnnData(
df[c("ENSG00000153563", "ENSG00000010610", "ENSG00000170458")],
obs = df[, "perturbation", drop = FALSE]
)
curator <- ln$curators$AnnDataCurator(adata, anndata_schema)
#> • saving validated records of 'columns'
#> ✓ added 3 records from public with Gene.ensembl_gene_id for "columns": 'ENSG00000170458', 'ENSG00000153563', 'ENSG00000010610'
artifact <- curator$save_artifact(description = "my RNA-seq")
#> ✓ "perturbation" is validated against ULabel.name
#> • path content will be copied to default storage upon `save()` with key `None` ('.lamindb/7Ci5odXeEN4yuGPW0000.h5ad')
#> ✓ storing artifact '7Ci5odXeEN4yuGPW0000' at '/tmp/RtmpJ8A8HB/laminr-intro-20250310172937/.lamindb/7Ci5odXeEN4yuGPW0000.h5ad'
#> • parsing feature names of X stored in slot 'var'
#> ✓ 3 unique terms (100.00%) are validated for ensembl_gene_id
#> ✓ linked: Schema(uid='8JKccgcJ5p9uSew4vYPb', n=3, dtype='float', itype='bionty.Gene', is_type=False, hash='f2UVeHefaZxXFjmUwo9Ozw', minimal_set=True, ordered_set=False, maximal_set=False, space_id=1, created_by_id=1, run_id=1, created_at=<django.db.models.expressions.DatabaseDefault object at 0x7fe5e87e72c0>)
#> • parsing feature names of slot 'obs'
#> ✓ 1 unique term (100.00%) is validated for name
#> → returning existing schema with same hash: Schema(uid='oZW5uuU1TIWfXXKNz6hB', name='my_obs_schema', n=1, itype='Feature', is_type=False, hash='nHGWVy7t0lz0LQS6lpcZnA', minimal_set=True, ordered_set=False, maximal_set=False, space_id=1, created_by_id=1, run_id=1, created_at=2025-03-10 17:30:00 UTC)
#> ! updated otype from None to DataFrame
#> ✓ linked: Schema(uid='oZW5uuU1TIWfXXKNz6hB', name='my_obs_schema', n=1, itype='Feature', is_type=False, otype='DataFrame', hash='nHGWVy7t0lz0LQS6lpcZnA', minimal_set=True, ordered_set=False, maximal_set=False, space_id=1, created_by_id=1, run_id=1, created_at=2025-03-10 17:30:00 UTC)
#> ✓ saved 1 feature set for slot: 'var'
artifact$describe()
#> Artifact .h5ad/AnnData
#> ├── General
#> │ ├── .uid = '7Ci5odXeEN4yuGPW0000'
#> │ ├── .size = 19240
#> │ ├── .hash = 'gO44MDqttaaKNyBLVM-zzA'
#> │ ├── .n_observations = 3
#> │ ├── .path =
#> │ │ /tmp/RtmpJ8A8HB/laminr-intro-20250310172937/.lamindb/7Ci5odXeEN4yuGPW000
#> │ │ 0.h5ad
#> │ ├── .created_by = anonymous
#> │ ├── .created_at = 2025-03-10 17:30:02
#> │ └── .transform = 'introduction.Rmd'
#> ├── Dataset features/.feature_sets
#> │ ├── var • 3 [bionty.Gene]
#> │ │ CD14 float
#> │ │ CD8A float
#> │ │ CD4 float
#> │ └── obs • 1 [Feature]
#> │ perturbation cat[ULabel] DMSO, IFNG
#> └── Labels
#> └── .ulabels ULabel DMSO, IFNG
genes <- bt$Gene$filter(organism__name = "human")$lookup()
feature_sets <- ln$FeatureSet$filter(genes = genes$cd8a)$all()
ln$Artifact$filter(feature_sets__in = feature_sets)$df()
#> uid key description suffix kind otype size
#> 3 7Ci5odXeEN4yuGPW0000 <NA> my RNA-seq .h5ad dataset AnnData 19240
#> hash n_files n_observations _hash_type _key_is_virtual
#> 3 gO44MDqttaaKNyBLVM-zzA <NA> 3 md5 TRUE
#> _overwrite_versions space_id storage_id schema_id version is_latest run_id
#> 3 FALSE 1 1 4 <NA> TRUE 1
#> created_at created_by_id _aux _branch_code
#> 3 2025-03-10 17:30:02 1 <NA> 1
neuron <- bt$CellType$from_source(name = "neuron")$save()
#> ✓ created 1 CellType record from Bionty matching name: 'neuron'
#> ✓ created 3 CellType records from Bionty matching ontology_id: 'CL:0002319', 'CL:0000404', 'CL:0000393'
new_cell_state <- bt$CellType(
name = "my neuron cell state", description = "explains X"
)$save()
new_cell_state$parents$add(neuron)
new_cell_state$view_parents(distance = 2)
Scale learning
See https://docs.lamin.ai/introduction#scale-learning.
df2 <- ln$core$datasets$small_dataset2(otype = "DataFrame")
adata <- AnnData(
df2[c("ENSG00000153563", "ENSG00000010610", "ENSG00000004468")],
obs = df2[, "perturbation", drop = FALSE]
)
curator <- ln$curators$AnnDataCurator(adata, anndata_schema)
#> • saving validated records of 'columns'
#> ✓ added 1 record from public with Gene.ensembl_gene_id for "columns": 'ENSG00000004468'
artifact2 <- curator$save_artifact(key = "my_datasets/my_rnaseq2.h5ad")
#> ✓ "perturbation" is validated against ULabel.name
#> • path content will be copied to default storage upon `save()` with key 'my_datasets/my_rnaseq2.h5ad'
#> ✓ storing artifact 'j3mcBo5sbvXaNHAK0000' at '/tmp/RtmpJ8A8HB/laminr-intro-20250310172937/.lamindb/j3mcBo5sbvXaNHAK0000.h5ad'
#> • parsing feature names of X stored in slot 'var'
#> ✓ 3 unique terms (100.00%) are validated for ensembl_gene_id
#> ✓ linked: Schema(uid='3Zt0X3MCIbPw7mioNAhm', n=3, dtype='float', itype='bionty.Gene', is_type=False, hash='QW2rHuIo5-eGNZbRxHMDCw', minimal_set=True, ordered_set=False, maximal_set=False, space_id=1, created_by_id=1, run_id=1, created_at=<django.db.models.expressions.DatabaseDefault object at 0x7fe5e88c6b40>)
#> • parsing feature names of slot 'obs'
#> ✓ 1 unique term (100.00%) is validated for name
#> → returning existing schema with same hash: Schema(uid='oZW5uuU1TIWfXXKNz6hB', name='my_obs_schema', n=1, itype='Feature', is_type=False, hash='nHGWVy7t0lz0LQS6lpcZnA', minimal_set=True, ordered_set=False, maximal_set=False, space_id=1, created_by_id=1, run_id=1, created_at=2025-03-10 17:30:00 UTC)
#> ! updated otype from None to DataFrame
#> ✓ linked: Schema(uid='oZW5uuU1TIWfXXKNz6hB', name='my_obs_schema', n=1, itype='Feature', is_type=False, otype='DataFrame', hash='nHGWVy7t0lz0LQS6lpcZnA', minimal_set=True, ordered_set=False, maximal_set=False, space_id=1, created_by_id=1, run_id=1, created_at=2025-03-10 17:30:00 UTC)
#> ✓ saved 1 feature set for slot: 'var'
collection <- ln$Collection(
list(artifact, artifact2),
key = "my-RNA-seq-collection"
)$save()
collection$describe()
#> Collection
#> └── General
#> ├── .uid = 'vL9IW55WPb9VoGOz0000'
#> ├── .key = 'my-RNA-seq-collection'
#> ├── .hash = 'DbgO9hDdS-KOydwDZDLp3g'
#> ├── .created_by = anonymous
#> ├── .created_at = 2025-03-10 17:30:05
#> └── .transform = 'introduction.Rmd'
collection$view_lineage()
#> ✖ `view_lineage()` is not yet implemented. Please view the lineage in the web interface.
collection$load()
#> AnnData object with n_obs × n_vars = 6 × 4
#> obs: 'perturbation', 'artifact_uid'
collection$artifacts$all()
#> <QuerySet [Artifact(uid='7Ci5odXeEN4yuGPW0000', is_latest=True, description='my RNA-seq', suffix='.h5ad', kind='dataset', otype='AnnData', size=19240, hash='gO44MDqttaaKNyBLVM-zzA', n_observations=3, space_id=1, storage_id=1, run_id=1, schema_id=4, created_by_id=1, created_at=2025-03-10 17:30:02 UTC), Artifact(uid='j3mcBo5sbvXaNHAK0000', is_latest=True, key='my_datasets/my_rnaseq2.h5ad', suffix='.h5ad', kind='dataset', otype='AnnData', size=19240, hash='Ti7bcnIOlk0fIt2TFW_VAw', n_observations=3, space_id=1, storage_id=1, run_id=1, schema_id=4, created_by_id=1, created_at=2025-03-10 17:30:05 UTC)]>
collection$artifacts$df()
#> uid key description suffix kind
#> 3 7Ci5odXeEN4yuGPW0000 <NA> my RNA-seq .h5ad dataset
#> 4 j3mcBo5sbvXaNHAK0000 my_datasets/my_rnaseq2.h5ad <NA> .h5ad dataset
#> otype size hash n_files n_observations _hash_type
#> 3 AnnData 19240 gO44MDqttaaKNyBLVM-zzA <NA> 3 md5
#> 4 AnnData 19240 Ti7bcnIOlk0fIt2TFW_VAw <NA> 3 md5
#> _key_is_virtual _overwrite_versions space_id storage_id schema_id version
#> 3 TRUE FALSE 1 1 4 <NA>
#> 4 TRUE FALSE 1 1 4 <NA>
#> is_latest run_id created_at created_by_id _aux _branch_code
#> 3 TRUE 1 2025-03-10 17:30:02 1 <NA> 1
#> 4 TRUE 1 2025-03-10 17:30:05 1 <NA> 1
# This example might be beyond the scope of {laminr}
from torch.utils.data import DataLoader, WeightedRandomSampler
dataset = collection.mapped(obs_keys=["perturbation"])
sampler = WeightedRandomSampler(
weights=dataset.get_label_weights("perturbation"), num_samples=len(dataset)
)
data_loader = DataLoader(dataset, batch_size=2, sampler=sampler)
for batch in data_loader:
pass
Design
See https://docs.lamin.ai/introduction#design.
Influences
See https://docs.lamin.ai/introduction#influences.
ln$finish()
#> ! no html report found; to attach one, create an .html export for your .Rmd file and then run: lamin save introduction.Rmd
#> → finished Run('uucVQFwb') after 12s at 2025-03-10 17:30:06 UTC
#> ! calling anonymously, will miss private instances