Generate summary statistics
SummaryStatsBy.RdSplits the data by two variables, computing relevant statistics for each variable.
Arguments
- data
The
data.framecontaining the data- by1
character string of splitting variable 1
- by2
character string of splitting variable 2
- var.names
character vector of variables to compute statistics for
- stats
statistics to compute for continuous variables
- digits
number of digits to round to
- format
format to return the table in. Either "raw", "pandoc" (for Word and PDF), "html", or "long" format for graphing and data manipulation using raw values.
Details
The data is split by two variables, by1 and by2, and
statistics are computed for continuous variables. Statistics currently
supported include mean, sd, median, IQR, range, and the number of
missing cases. For factor variables, the counts, column and row percentages
are shown for each of the variable levels.
Note that marginal statistics are also shown for by1, so the order in
which you split data matters.
There are four print options for the output: raw gives the output as a
character matrix, pandoc gives a Pandoc-friendly output for Word and
PDF reports, html gives HTML supported output, and long is a
tidy version of raw.
Examples
mtcars$vs <- as.factor(mtcars$vs); mtcars$am <- as.factor(mtcars$am)
SummaryStatsBy(mtcars, by1 = "cyl", by2 = "gear", var.names = c("mpg", "vs",
"qsec", "am"))
#> cyl=4, gear=3 cyl=4, gear=4 cyl=4, gear=5
#> **mpg** "" "" ""
#> mean "21.5 ± NA" "26.925 ± 4.807" "28.2 ± 3.111"
#> median "21.5" "25.85" "28.2"
#> IQR "0" "8.1" "2.2"
#> range "21.5" "21.4-33.9" "26-30.4"
#> missing "0" "0" "0"
#> **vs** "" "" ""
#> 0 "0 (0%, 0%)" "0 (0%, 0%)" "1 (5.56%, 50%)"
#> 1 "1 (7.14%, 100%)" "8 (57.14%, 100%)" "1 (7.14%, 50%)"
#> **qsec** "" "" ""
#> mean "20.01 ± NA" "19.613 ± 1.454" "16.8 ± 0.141"
#> median "20.01" "19.185" "16.8"
#> IQR "0" "1.317" "0.1"
#> range "20.01" "18.52-22.9" "16.7-16.9"
#> missing "0" "0" "0"
#> **am** "" "" ""
#> 0 "1 (5.26%, 100%)" "2 (10.53%, 25%)" "0 (0%, 0%)"
#> 1 "0 (0%, 0%)" "6 (46.15%, 75%)" "2 (15.38%, 100%)"
#> cyl=4 cyl=6, gear=3 cyl=6, gear=4
#> **mpg** "" "" ""
#> mean "26.664 ± 4.51" "19.75 ± 2.333" "19.75 ± 1.552"
#> median "26" "19.75" "20.1"
#> IQR "7.6" "1.65" "2.15"
#> range "21.4-33.9" "18.1-21.4" "17.8-21"
#> missing "0" "0" "0"
#> **vs** "" "" ""
#> 0 "1 (9.09%)" "0 (0%, 0%)" "2 (11.11%, 50%)"
#> 1 "10 (90.91%)" "2 (14.29%, 100%)" "2 (14.29%, 50%)"
#> **qsec** "" "" ""
#> mean "19.137 ± 1.682" "19.83 ± 0.552" "17.67 ± 1.125"
#> median "18.9" "19.83" "17.66"
#> IQR "1.39" "0.39" "1.57"
#> range "16.7-22.9" "19.44-20.22" "16.46-18.9"
#> missing "0" "0" "0"
#> **am** "" "" ""
#> 0 "3 (27.27%)" "2 (10.53%, 100%)" "2 (10.53%, 50%)"
#> 1 "8 (72.73%)" "0 (0%, 0%)" "2 (15.38%, 50%)"
#> cyl=6, gear=5 cyl=6 cyl=8, gear=3
#> **mpg** "" "" ""
#> mean "19.7 ± NA" "19.743 ± 1.454" "15.05 ± 2.774"
#> median "19.7" "19.7" "15.2"
#> IQR "0" "2.35" "2.575"
#> range "19.7" "17.8-21.4" "10.4-19.2"
#> missing "0" "0" "0"
#> **vs** "" "" ""
#> 0 "1 (5.56%, 100%)" "3 (42.86%)" "12 (66.67%, 100%)"
#> 1 "0 (0%, 0%)" "4 (57.14%)" "0 (0%, 0%)"
#> **qsec** "" "" ""
#> mean "15.5 ± NA" "17.977 ± 1.707" "17.143 ± 0.802"
#> median "15.5" "18.3" "17.35"
#> IQR "0" "2.43" "0.672"
#> range "15.5" "15.5-20.22" "15.41-18"
#> missing "0" "0" "0"
#> **am** "" "" ""
#> 0 "0 (0%, 0%)" "4 (57.14%)" "12 (63.16%, 100%)"
#> 1 "1 (7.69%, 100%)" "3 (42.86%)" "0 (0%, 0%)"
#> cyl=8, gear=5 cyl=8
#> **mpg** "" ""
#> mean "15.4 ± 0.566" "15.1 ± 2.56"
#> median "15.4" "15.2"
#> IQR "0.4" "1.85"
#> range "15-15.8" "10.4-19.2"
#> missing "0" "0"
#> **vs** "" ""
#> 0 "2 (11.11%, 100%)" "14 (100%)"
#> 1 "0 (0%, 0%)" "0 (0%)"
#> **qsec** "" ""
#> mean "14.55 ± 0.071" "16.772 ± 1.196"
#> median "14.55" "17.175"
#> IQR "0.05" "1.457"
#> range "14.5-14.6" "14.5-18"
#> missing "0" "0"
#> **am** "" ""
#> 0 "0 (0%, 0%)" "12 (85.71%)"
#> 1 "2 (15.38%, 100%)" "2 (14.29%)"
SummaryStatsBy(mtcars, by1 = "cyl", by2 = "gear", var.names = c("vs",
"qsec"))
#> cyl=4, gear=3 cyl=4, gear=4 cyl=4, gear=5
#> **vs** "" "" ""
#> 0 "0 (0%, 0%)" "0 (0%, 0%)" "1 (5.56%, 50%)"
#> 1 "1 (7.14%, 100%)" "8 (57.14%, 100%)" "1 (7.14%, 50%)"
#> **qsec** "" "" ""
#> mean "20.01 ± NA" "19.613 ± 1.454" "16.8 ± 0.141"
#> median "20.01" "19.185" "16.8"
#> IQR "0" "1.317" "0.1"
#> range "20.01" "18.52-22.9" "16.7-16.9"
#> missing "0" "0" "0"
#> cyl=4 cyl=6, gear=3 cyl=6, gear=4
#> **vs** "" "" ""
#> 0 "1 (9.09%)" "0 (0%, 0%)" "2 (11.11%, 50%)"
#> 1 "10 (90.91%)" "2 (14.29%, 100%)" "2 (14.29%, 50%)"
#> **qsec** "" "" ""
#> mean "19.137 ± 1.682" "19.83 ± 0.552" "17.67 ± 1.125"
#> median "18.9" "19.83" "17.66"
#> IQR "1.39" "0.39" "1.57"
#> range "16.7-22.9" "19.44-20.22" "16.46-18.9"
#> missing "0" "0" "0"
#> cyl=6, gear=5 cyl=6 cyl=8, gear=3
#> **vs** "" "" ""
#> 0 "1 (5.56%, 100%)" "3 (42.86%)" "12 (66.67%, 100%)"
#> 1 "0 (0%, 0%)" "4 (57.14%)" "0 (0%, 0%)"
#> **qsec** "" "" ""
#> mean "15.5 ± NA" "17.977 ± 1.707" "17.143 ± 0.802"
#> median "15.5" "18.3" "17.35"
#> IQR "0" "2.43" "0.672"
#> range "15.5" "15.5-20.22" "15.41-18"
#> missing "0" "0" "0"
#> cyl=8, gear=5 cyl=8
#> **vs** "" ""
#> 0 "2 (11.11%, 100%)" "14 (100%)"
#> 1 "0 (0%, 0%)" "0 (0%)"
#> **qsec** "" ""
#> mean "14.55 ± 0.071" "16.772 ± 1.196"
#> median "14.55" "17.175"
#> IQR "0.05" "1.457"
#> range "14.5-14.6" "14.5-18"
#> missing "0" "0"
SummaryStatsBy(mtcars, by1 = "cyl", by2 = "gear", var.names = c("mpg"))
#> cyl=4, gear=3 cyl=4, gear=4 cyl=4, gear=5
#> **mpg** "" "" ""
#> mean "21.5 ± NA" "26.925 ± 4.807" "28.2 ± 3.111"
#> median "21.5" "25.85" "28.2"
#> IQR "0" "8.1" "2.2"
#> range "21.5" "21.4-33.9" "26-30.4"
#> missing "0" "0" "0"
#> cyl=4 cyl=6, gear=3 cyl=6, gear=4
#> **mpg** "" "" ""
#> mean "26.664 ± 4.51" "19.75 ± 2.333" "19.75 ± 1.552"
#> median "26" "19.75" "20.1"
#> IQR "7.6" "1.65" "2.15"
#> range "21.4-33.9" "18.1-21.4" "17.8-21"
#> missing "0" "0" "0"
#> cyl=6, gear=5 cyl=6 cyl=8, gear=3
#> **mpg** "" "" ""
#> mean "19.7 ± NA" "19.743 ± 1.454" "15.05 ± 2.774"
#> median "19.7" "19.7" "15.2"
#> IQR "0" "2.35" "2.575"
#> range "19.7" "17.8-21.4" "10.4-19.2"
#> missing "0" "0" "0"
#> cyl=8, gear=5 cyl=8
#> **mpg** "" ""
#> mean "15.4 ± 0.566" "15.1 ± 2.56"
#> median "15.4" "15.2"
#> IQR "0.4" "1.85"
#> range "15-15.8" "10.4-19.2"
#> missing "0" "0"