A tibble of simulated biomarker measurements with missing entries.
Each row corresponds to one observation (indexed by index
), and the remaining
columns are the measured biomarker values, some of which are set to NA to
demonstrate imputation workflows.
Format
A tibble with 8,000 rows and 30 variables:
- index
Integer. Row identifier imported from
data_raw/df_missing.csv
.- Age, Salary, ZipCode10001-ZipCode30003
Demographic columns. Omit from selection of validation set. No missingness
- Y11, ..., Y55
Simulated Biomarker columns, have missingness
Examples
data(df_missing)
str(df_missing)
#> spc_tbl_ [8,000 × 31] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
#> $ index : num [1:8000] 0 1 2 3 4 5 6 7 8 9 ...
#> $ Age : num [1:8000] 11.04 9.73 11.38 13.56 9.54 ...
#> $ Salary : num [1:8000] 6.37 5.91 6.64 5.9 6.13 ...
#> $ ZipCode10001: num [1:8000] 0 1 0 0 1 1 1 0 0 0 ...
#> $ ZipCode20002: num [1:8000] 1 0 1 0 0 0 0 1 1 0 ...
#> $ ZipCode30003: num [1:8000] 0 0 0 1 0 0 0 0 0 1 ...
#> $ Y11 : num [1:8000] -4.05 0.546 NA -10.608 0.358 ...
#> $ Y12 : num [1:8000] NA NA NA NA -16.5 ...
#> $ Y13 : num [1:8000] NA -12.2 -20.4 NA -11.3 ...
#> $ Y14 : num [1:8000] -14.37 -7.72 -15.13 -14.21 NA ...
#> $ Y15 : num [1:8000] -17.6 NA -17.3 -21.3 NA ...
#> $ Y21 : num [1:8000] NA -7.47 -18.45 -21.97 -7.58 ...
#> $ Y22 : num [1:8000] NA NA NA NA -27.6 ...
#> $ Y23 : num [1:8000] -35.8 -25.9 -34.4 -40.2 NA ...
#> $ Y24 : num [1:8000] -28.1 -17.2 -27.3 -26.3 NA ...
#> $ Y25 : num [1:8000] -30.2 -18.7 -28.8 -33.4 -18.6 ...
#> $ Y31 : num [1:8000] -1.63 4.36 -2.17 -7.48 8.1 ...
#> $ Y32 : num [1:8000] NA NA NA NA -13.6 ...
#> $ Y33 : num [1:8000] -16.77 -10.93 -17.19 -25.31 -9.83 ...
#> $ Y34 : num [1:8000] -10.69 -5.89 -10.49 NA NA ...
#> $ Y35 : num [1:8000] -13.9 -6.09 -12.29 -15.43 -2.96 ...
#> $ Y41 : num [1:8000] -0.905 2.625 NA -2.825 3.617 ...
#> $ Y42 : num [1:8000] NA NA NA NA -4.62 ...
#> $ Y43 : num [1:8000] NA -5.78 -7.22 -8.29 -3.86 ...
#> $ Y44 : num [1:8000] -3.69 -1.38 -3.35 -2.4 NA ...
#> $ Y45 : num [1:8000] -5.68 -2.33 -6.9 NA -1.5 ...
#> $ Y51 : num [1:8000] 2.588 6.081 2.531 0.139 NA ...
#> $ Y52 : num [1:8000] NA NA NA NA -3.34 ...
#> $ Y53 : num [1:8000] -4.68 -2.29 -5.43 -5.73 -1.92 ...
#> $ Y54 : num [1:8000] -2.248 -0.887 -1.33 -1.64 NA ...
#> $ Y55 : num [1:8000] -2.679 0.563 -2.324 -4.446 0.103 ...
#> - attr(*, "spec")=List of 3
#> ..$ cols :List of 31
#> .. ..$ ...1 : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Age : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Salary : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ ZipCode10001: list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ ZipCode20002: list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ ZipCode30003: list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Y11 : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Y12 : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Y13 : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Y14 : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Y15 : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Y21 : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Y22 : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Y23 : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Y24 : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Y25 : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Y31 : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Y32 : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Y33 : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Y34 : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Y35 : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Y41 : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Y42 : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Y43 : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Y44 : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Y45 : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Y51 : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Y52 : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Y53 : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Y54 : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Y55 : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> ..$ default: list()
#> .. ..- attr(*, "class")= chr [1:2] "collector_guess" "collector"
#> ..$ delim : chr ","
#> ..- attr(*, "class")= chr "col_spec"
#> - attr(*, "problems")=<externalptr>
summary(df_missing)
#> index Age Salary ZipCode10001
#> Min. : 0 Min. : 4.782 Min. :5.000 Min. :0.0000
#> 1st Qu.:2000 1st Qu.: 8.732 1st Qu.:5.338 1st Qu.:0.0000
#> Median :4000 Median : 9.986 Median :5.700 Median :0.0000
#> Mean :4000 Mean :10.198 Mean :5.819 Mean :0.3285
#> 3rd Qu.:5999 3rd Qu.:11.440 3rd Qu.:6.169 3rd Qu.:1.0000
#> Max. :7999 Max. :21.929 Max. :8.959 Max. :1.0000
#>
#> ZipCode20002 ZipCode30003 Y11 Y12
#> Min. :0.0000 Min. :0.0000 Min. :-66.3324 Min. :-66.84
#> 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:-11.1297 1st Qu.:-22.37
#> Median :0.0000 Median :0.0000 Median : -0.4072 Median : 48.60
#> Mean :0.3371 Mean :0.3344 Mean : -2.8224 Mean : 22.23
#> 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.: 7.2439 3rd Qu.: 58.35
#> Max. :1.0000 Max. :1.0000 Max. : 26.2178 Max. :112.32
#> NA's :3122 NA's :3118
#> Y13 Y14 Y15 Y21
#> Min. :-74.61 Min. :-49.70 Min. :-46.87 Min. :-78.8702
#> 1st Qu.:-17.25 1st Qu.:-11.98 1st Qu.:-11.56 1st Qu.:-22.0992
#> Median : 72.43 Median : 69.08 Median : 72.28 Median : -9.4794
#> Mean : 42.75 Mean : 55.55 Mean : 63.77 Mean :-12.2318
#> 3rd Qu.: 94.71 3rd Qu.:118.98 3rd Qu.:134.28 3rd Qu.: -0.1001
#> Max. :141.59 Max. :184.63 Max. :212.94 Max. : 19.8433
#> NA's :3110 NA's :3129 NA's :3141 NA's :3135
#> Y22 Y23 Y24 Y25
#> Min. :-91.64 Min. :-98.82 Min. :-68.73 Min. :-67.74
#> 1st Qu.:-34.60 1st Qu.:-29.16 1st Qu.:-22.44 1st Qu.:-21.76
#> Median : 47.36 Median : 74.15 Median : 71.14 Median : 73.21
#> Mean : 16.04 Mean : 39.92 Mean : 55.14 Mean : 63.86
#> 3rd Qu.: 57.56 3rd Qu.: 99.94 3rd Qu.:128.49 3rd Qu.:145.23
#> Max. :114.18 Max. :151.39 Max. :195.89 Max. :231.72
#> NA's :3094 NA's :3098 NA's :3146 NA's :3106
#> Y31 Y32 Y33 Y34
#> Min. :-61.8386 Min. :-64.36 Min. :-77.52 Min. :-44.738
#> 1st Qu.:-14.2448 1st Qu.:-16.84 1st Qu.:-12.68 1st Qu.: -6.078
#> Median : -0.4389 Median : 59.39 Median : 80.92 Median : 79.149
#> Mean : -2.3836 Mean : 35.48 Mean : 54.80 Mean : 66.597
#> 3rd Qu.: 10.5156 3rd Qu.: 69.12 3rd Qu.:101.36 3rd Qu.:124.148
#> Max. : 33.9299 Max. :118.72 Max. :153.84 Max. :198.039
#> NA's :2067 NA's :2056 NA's :2013 NA's :2051
#> Y35 Y41 Y42 Y43
#> Min. :-43.026 Min. :-25.0276 Min. :-26.607 Min. :-21.134
#> 1st Qu.: -5.815 1st Qu.: -6.4226 1st Qu.: -7.992 1st Qu.: -5.286
#> Median : 80.878 Median : -0.0618 Median : 27.032 Median : 37.099
#> Mean : 73.133 Mean : -1.0265 Mean : 16.618 Mean : 25.381
#> 3rd Qu.:138.746 3rd Qu.: 4.7805 3rd Qu.: 32.320 3rd Qu.: 46.395
#> Max. :226.577 Max. : 14.2158 Max. : 59.684 Max. : 72.119
#> NA's :2054 NA's :2032 NA's :2022 NA's :2013
#> Y44 Y45 Y51 Y52
#> Min. :-17.799 Min. :-15.073 Min. :-19.8408 Min. :-20.575
#> 1st Qu.: -2.974 1st Qu.: -2.927 1st Qu.: -3.6429 1st Qu.: -5.056
#> Median : 35.892 Median : 37.450 Median : 1.8005 Median : 24.853
#> Mean : 30.326 Mean : 33.814 Mean : 0.9457 Mean : 16.000
#> 3rd Qu.: 56.243 3rd Qu.: 63.199 3rd Qu.: 5.8282 3rd Qu.: 29.464
#> Max. : 94.820 Max. :110.590 Max. : 14.8720 Max. : 52.764
#> NA's :2023 NA's :2086 NA's :2077 NA's :2034
#> Y53 Y54 Y55
#> Min. :-22.010 Min. :-16.174 Min. :-9.8664
#> 1st Qu.: -2.686 1st Qu.: -0.699 1st Qu.:-0.8639
#> Median : 33.748 Median : 32.594 Median :33.7227
#> Mean : 23.584 Mean : 27.660 Mean :30.4929
#> 3rd Qu.: 41.398 3rd Qu.: 49.971 3rd Qu.:55.6098
#> Max. : 65.703 Max. : 82.862 Max. :95.5910
#> NA's :1976 NA's :2047 NA's :2050