Sample n rows from a table — sample_n • tidySpatialExperiment

sample_n() and sample_frac() have been superseded in favour of slice_sample(). While they will not be deprecated in the near future, retirement means that we will only perform critical bug fixes, so we recommend moving to the newer alternative.

These functions were superseded because we realised it was more convenient to have two mutually exclusive arguments to one function, rather than two separate functions. This also made it to clean up a few other smaller design issues with sample_n()/sample_frac:

The connection to slice() was not obvious.
The name of the first argument, tbl, is inconsistent with other single table verbs which use .data.
The size argument uses tidy evaluation, which is surprising and undocumented.
It was easier to remove the deprecated .env argument.
... was in a suboptimal position.

# S3 method for class 'SpatialExperiment'
sample_n(tbl, size, replace = FALSE, weight = NULL, .env = NULL, ...)

# S3 method for class 'SpatialExperiment'
sample_frac(tbl, size = 1, replace = FALSE, weight = NULL, .env = NULL, ...)

Arguments

tbl: A data.frame.
size: <tidy-select> For sample_n(), the number of rows to select. For sample_frac(), the fraction of rows to select. If tbl is grouped, size applies to each group.
replace: Sample with or without replacement?
weight: <tidy-select> Sampling weights. This must evaluate to a vector of non-negative numbers the same length as the input. Weights are automatically standardised to sum to 1.
.env: DEPRECATED.
...: ignored

Value

tidySpatialExperiment

Examples

example(read10xVisium)
#> 
#> rd10xV> dir <- system.file(
#> rd10xV+   file.path("extdata", "10xVisium"), 
#> rd10xV+   package = "SpatialExperiment")
#> 
#> rd10xV> sample_ids <- c("section1", "section2")
#> 
#> rd10xV> samples <- file.path(dir, sample_ids, "outs")
#> 
#> rd10xV> list.files(samples[1])
#> [1] "raw_feature_bc_matrix" "spatial"              
#> 
#> rd10xV> list.files(file.path(samples[1], "spatial"))
#> [1] "scalefactors_json.json"    "tissue_lowres_image.png"  
#> [3] "tissue_positions_list.csv"
#> 
#> rd10xV> file.path(samples[1], "raw_feature_bc_matrix")
#> [1] "/home/runner/work/_temp/Library/SpatialExperiment/extdata/10xVisium/section1/outs/raw_feature_bc_matrix"
#> 
#> rd10xV> (spe <- read10xVisium(samples, sample_ids, 
#> rd10xV+   type = "sparse", data = "raw", 
#> rd10xV+   images = "lowres", load = FALSE))
#> Warning: 'read10xVisium' is deprecated.
#> Use 'VisiumIO::TENxVisium(List)' instead.
#> See help("Deprecated")
#> # A SpatialExperiment-tibble abstraction: 99 × 7
#> # Features = 50 | Cells = 99 | Assays = counts
#>    .cell              in_tissue array_row array_col sample_id pxl_col_in_fullres
#>    <chr>              <lgl>         <int>     <int> <chr>                  <int>
#>  1 AAACAACGAATAGTTC-1 FALSE             0        16 section1                2312
#>  2 AAACAAGTATCTCCCA-1 TRUE             50       102 section1                8230
#>  3 AAACAATCTACTAGCA-1 TRUE              3        43 section1                4170
#>  4 AAACACCAATAACTGC-1 TRUE             59        19 section1                2519
#>  5 AAACAGAGCGACTCCT-1 TRUE             14        94 section1                7679
#>  6 AAACAGCTTTCAGAAG-1 FALSE            43         9 section1                1831
#>  7 AAACAGGGTCTATATT-1 FALSE            47        13 section1                2106
#>  8 AAACAGTGTTCCTGGG-1 FALSE            73        43 section1                4170
#>  9 AAACATGGTGAGAGGA-1 FALSE            62         0 section1                1212
#> 10 AAACATTTCCCGGATT-1 FALSE            61        97 section1                7886
#> # ℹ 89 more rows
#> # ℹ 1 more variable: pxl_row_in_fullres <int>
#> 
#> rd10xV> # base directory 'outs/' from Space Ranger can also be omitted
#> rd10xV> samples2 <- file.path(dir, sample_ids)
#> 
#> rd10xV> (spe2 <- read10xVisium(samples2, sample_ids, 
#> rd10xV+   type = "sparse", data = "raw", 
#> rd10xV+   images = "lowres", load = FALSE))
#> Warning: 'read10xVisium' is deprecated.
#> Use 'VisiumIO::TENxVisium(List)' instead.
#> See help("Deprecated")
#> # A SpatialExperiment-tibble abstraction: 99 × 7
#> # Features = 50 | Cells = 99 | Assays = counts
#>    .cell              in_tissue array_row array_col sample_id pxl_col_in_fullres
#>    <chr>              <lgl>         <int>     <int> <chr>                  <int>
#>  1 AAACAACGAATAGTTC-1 FALSE             0        16 section1                2312
#>  2 AAACAAGTATCTCCCA-1 TRUE             50       102 section1                8230
#>  3 AAACAATCTACTAGCA-1 TRUE              3        43 section1                4170
#>  4 AAACACCAATAACTGC-1 TRUE             59        19 section1                2519
#>  5 AAACAGAGCGACTCCT-1 TRUE             14        94 section1                7679
#>  6 AAACAGCTTTCAGAAG-1 FALSE            43         9 section1                1831
#>  7 AAACAGGGTCTATATT-1 FALSE            47        13 section1                2106
#>  8 AAACAGTGTTCCTGGG-1 FALSE            73        43 section1                4170
#>  9 AAACATGGTGAGAGGA-1 FALSE            62         0 section1                1212
#> 10 AAACATTTCCCGGATT-1 FALSE            61        97 section1                7886
#> # ℹ 89 more rows
#> # ℹ 1 more variable: pxl_row_in_fullres <int>
#> 
#> rd10xV> # tabulate number of spots mapped to tissue
#> rd10xV> cd <- colData(spe)
#> 
#> rd10xV> table(
#> rd10xV+   in_tissue = cd$in_tissue, 
#> rd10xV+   sample_id = cd$sample_id)
#>          sample_id
#> in_tissue section1 section2
#>     FALSE       28       27
#>     TRUE        22       22
#> 
#> rd10xV> # view available images
#> rd10xV> imgData(spe)
#> DataFrame with 2 rows and 4 columns
#>     sample_id    image_id   data scaleFactor
#>   <character> <character> <list>   <numeric>
#> 1    section1      lowres   ####   0.0510334
#> 2    section2      lowres   ####   0.0510334
spe |>
    sample_n(10)
#> # A SpatialExperiment-tibble abstraction: 10 × 7
#> # Features = 50 | Cells = 10 | Assays = counts
#>    .cell              in_tissue array_row array_col sample_id pxl_col_in_fullres
#>    <chr>              <lgl>         <int>     <int> <chr>                  <int>
#>  1 AAACCTAAGCAGCCGG-1 FALSE            65        83 section1                6922
#>  2 AAACGGGTTGGTATCC-1 FALSE             1        23 section1                2794
#>  3 AAAGACCCAAGTCGCG-1 TRUE             10        48 section1                4514
#>  4 AAACAGTGTTCCTGGG-1 FALSE            73        43 section1                4170
#>  5 AAACTTGCAAACGTAT-1 TRUE             45        19 section1                2519
#>  6 AAAGAATGACCTTAGA-1 FALSE            64         2 section1                1349
#>  7 AAAGGGATGTAGCAAG-1 TRUE             24        62 section1                5477
#>  8 AAACTAACGTGGCGAC-1 FALSE             8       110 section1                8780
#>  9 AAAGTCACTGATGTAA-1 TRUE             10        52 section1                4789
#> 10 AAACGGTTGCGAACTG-1 TRUE             67        59 section1                5271
#> # ℹ 1 more variable: pxl_row_in_fullres <int>
spe |>
    sample_frac(0.1)
#> tidySpatialExperiment says: When sampling with replacement a data frame is returned 
#>             for independent data analysis.
#> # A tibble: 20 × 7
#>    .cell              in_tissue array_row array_col sample_id pxl_col_in_fullres
#>    <chr>              <lgl>         <int>     <int> <chr>                  <int>
#>  1 AAACAAGTATCTCCCA-1 TRUE             50       102 section1                8230
#>  2 AAACAATCTACTAGCA-1 TRUE              3        43 section1                4170
#>  3 AAACAGCTTTCAGAAG-1 FALSE            43         9 section1                1831
#>  4 AAACCTAAGCAGCCGG-1 FALSE            65        83 section1                6922
#>  5 AAACGAAGATGGAGTA-1 FALSE            58         4 section1                1487
#>  6 AAACGCCCGAGATCGG-1 FALSE             4       108 section1                8642
#>  7 AAAGAATGTGGACTAA-1 FALSE            71       105 section1                8436
#>  8 AAAGACTGGGCGCTTT-1 FALSE            29        15 section1                2243
#>  9 AAAGACTGGGCGCTTT-1 FALSE            29        15 section1                2243
#> 10 AAAGGCTACGGACCAT-1 TRUE             62        54 section1                4927
#> 11 AAACAAGTATCTCCCA-1 TRUE             50       102 section2                8230
#> 12 AAACAATCTACTAGCA-1 TRUE              3        43 section2                4170
#> 13 AAACAGCTTTCAGAAG-1 FALSE            43         9 section2                1831
#> 14 AAACCTAAGCAGCCGG-1 FALSE            65        83 section2                6922
#> 15 AAACGAAGATGGAGTA-1 FALSE            58         4 section2                1487
#> 16 AAACGCCCGAGATCGG-1 FALSE             4       108 section2                8642
#> 17 AAAGAATGTGGACTAA-1 FALSE            71       105 section2                8436
#> 18 AAAGACTGGGCGCTTT-1 FALSE            29        15 section2                2243
#> 19 AAAGACTGGGCGCTTT-1 FALSE            29        15 section2                2243
#> 20 AAAGGCTACGGACCAT-1 TRUE             62        54 section2                4927
#> # ℹ 1 more variable: pxl_row_in_fullres <int>