Generate summary statistics
SummaryStatsBy.Rd
Splits the data by two variables, computing relevant statistics for each variable.
Arguments
- data
The
data.frame
containing the data- by1
character string of splitting variable 1
- by2
character string of splitting variable 2
- var.names
character vector of variables to compute statistics for
- stats
statistics to compute for continuous variables
- digits
number of digits to round to
- format
format to return the table in. Either "raw", "pandoc" (for Word and PDF), "html", or "long" format for graphing and data manipulation using raw values.
Details
The data
is split by two variables, by1
and by2
, and
statistics are computed for continuous variables. Statistics currently
supported include mean, sd, median, IQR, range
, and the number of
missing cases. For factor variables, the counts, column and row percentages
are shown for each of the variable levels.
Note that marginal statistics are also shown for by1
, so the order in
which you split data
matters.
There are four print options for the output: raw
gives the output as a
character matrix, pandoc
gives a Pandoc-friendly output for Word and
PDF reports, html
gives HTML supported output, and long
is a
tidy version of raw
.
Examples
mtcars$vs <- as.factor(mtcars$vs); mtcars$am <- as.factor(mtcars$am)
SummaryStatsBy(mtcars, by1 = "cyl", by2 = "gear", var.names = c("mpg", "vs",
"qsec", "am"))
#> cyl=4, gear=3 cyl=4, gear=4 cyl=4, gear=5
#> **mpg** "" "" ""
#> mean "21.5 ± NA" "26.925 ± 4.807" "28.2 ± 3.111"
#> median "21.5" "25.85" "28.2"
#> IQR "0" "8.1" "2.2"
#> range "21.5" "21.4-33.9" "26-30.4"
#> missing "0" "0" "0"
#> **vs** "" "" ""
#> 0 "0 (0%, 0%)" "0 (0%, 0%)" "1 (5.56%, 50%)"
#> 1 "1 (7.14%, 100%)" "8 (57.14%, 100%)" "1 (7.14%, 50%)"
#> **qsec** "" "" ""
#> mean "20.01 ± NA" "19.613 ± 1.454" "16.8 ± 0.141"
#> median "20.01" "19.185" "16.8"
#> IQR "0" "1.317" "0.1"
#> range "20.01" "18.52-22.9" "16.7-16.9"
#> missing "0" "0" "0"
#> **am** "" "" ""
#> 0 "1 (5.26%, 100%)" "2 (10.53%, 25%)" "0 (0%, 0%)"
#> 1 "0 (0%, 0%)" "6 (46.15%, 75%)" "2 (15.38%, 100%)"
#> cyl=4 cyl=6, gear=3 cyl=6, gear=4
#> **mpg** "" "" ""
#> mean "26.664 ± 4.51" "19.75 ± 2.333" "19.75 ± 1.552"
#> median "26" "19.75" "20.1"
#> IQR "7.6" "1.65" "2.15"
#> range "21.4-33.9" "18.1-21.4" "17.8-21"
#> missing "0" "0" "0"
#> **vs** "" "" ""
#> 0 "1 (9.09%)" "0 (0%, 0%)" "2 (11.11%, 50%)"
#> 1 "10 (90.91%)" "2 (14.29%, 100%)" "2 (14.29%, 50%)"
#> **qsec** "" "" ""
#> mean "19.137 ± 1.682" "19.83 ± 0.552" "17.67 ± 1.125"
#> median "18.9" "19.83" "17.66"
#> IQR "1.39" "0.39" "1.57"
#> range "16.7-22.9" "19.44-20.22" "16.46-18.9"
#> missing "0" "0" "0"
#> **am** "" "" ""
#> 0 "3 (27.27%)" "2 (10.53%, 100%)" "2 (10.53%, 50%)"
#> 1 "8 (72.73%)" "0 (0%, 0%)" "2 (15.38%, 50%)"
#> cyl=6, gear=5 cyl=6 cyl=8, gear=3
#> **mpg** "" "" ""
#> mean "19.7 ± NA" "19.743 ± 1.454" "15.05 ± 2.774"
#> median "19.7" "19.7" "15.2"
#> IQR "0" "2.35" "2.575"
#> range "19.7" "17.8-21.4" "10.4-19.2"
#> missing "0" "0" "0"
#> **vs** "" "" ""
#> 0 "1 (5.56%, 100%)" "3 (42.86%)" "12 (66.67%, 100%)"
#> 1 "0 (0%, 0%)" "4 (57.14%)" "0 (0%, 0%)"
#> **qsec** "" "" ""
#> mean "15.5 ± NA" "17.977 ± 1.707" "17.143 ± 0.802"
#> median "15.5" "18.3" "17.35"
#> IQR "0" "2.43" "0.672"
#> range "15.5" "15.5-20.22" "15.41-18"
#> missing "0" "0" "0"
#> **am** "" "" ""
#> 0 "0 (0%, 0%)" "4 (57.14%)" "12 (63.16%, 100%)"
#> 1 "1 (7.69%, 100%)" "3 (42.86%)" "0 (0%, 0%)"
#> cyl=8, gear=5 cyl=8
#> **mpg** "" ""
#> mean "15.4 ± 0.566" "15.1 ± 2.56"
#> median "15.4" "15.2"
#> IQR "0.4" "1.85"
#> range "15-15.8" "10.4-19.2"
#> missing "0" "0"
#> **vs** "" ""
#> 0 "2 (11.11%, 100%)" "14 (100%)"
#> 1 "0 (0%, 0%)" "0 (0%)"
#> **qsec** "" ""
#> mean "14.55 ± 0.071" "16.772 ± 1.196"
#> median "14.55" "17.175"
#> IQR "0.05" "1.457"
#> range "14.5-14.6" "14.5-18"
#> missing "0" "0"
#> **am** "" ""
#> 0 "0 (0%, 0%)" "12 (85.71%)"
#> 1 "2 (15.38%, 100%)" "2 (14.29%)"
SummaryStatsBy(mtcars, by1 = "cyl", by2 = "gear", var.names = c("vs",
"qsec"))
#> cyl=4, gear=3 cyl=4, gear=4 cyl=4, gear=5
#> **vs** "" "" ""
#> 0 "0 (0%, 0%)" "0 (0%, 0%)" "1 (5.56%, 50%)"
#> 1 "1 (7.14%, 100%)" "8 (57.14%, 100%)" "1 (7.14%, 50%)"
#> **qsec** "" "" ""
#> mean "20.01 ± NA" "19.613 ± 1.454" "16.8 ± 0.141"
#> median "20.01" "19.185" "16.8"
#> IQR "0" "1.317" "0.1"
#> range "20.01" "18.52-22.9" "16.7-16.9"
#> missing "0" "0" "0"
#> cyl=4 cyl=6, gear=3 cyl=6, gear=4
#> **vs** "" "" ""
#> 0 "1 (9.09%)" "0 (0%, 0%)" "2 (11.11%, 50%)"
#> 1 "10 (90.91%)" "2 (14.29%, 100%)" "2 (14.29%, 50%)"
#> **qsec** "" "" ""
#> mean "19.137 ± 1.682" "19.83 ± 0.552" "17.67 ± 1.125"
#> median "18.9" "19.83" "17.66"
#> IQR "1.39" "0.39" "1.57"
#> range "16.7-22.9" "19.44-20.22" "16.46-18.9"
#> missing "0" "0" "0"
#> cyl=6, gear=5 cyl=6 cyl=8, gear=3
#> **vs** "" "" ""
#> 0 "1 (5.56%, 100%)" "3 (42.86%)" "12 (66.67%, 100%)"
#> 1 "0 (0%, 0%)" "4 (57.14%)" "0 (0%, 0%)"
#> **qsec** "" "" ""
#> mean "15.5 ± NA" "17.977 ± 1.707" "17.143 ± 0.802"
#> median "15.5" "18.3" "17.35"
#> IQR "0" "2.43" "0.672"
#> range "15.5" "15.5-20.22" "15.41-18"
#> missing "0" "0" "0"
#> cyl=8, gear=5 cyl=8
#> **vs** "" ""
#> 0 "2 (11.11%, 100%)" "14 (100%)"
#> 1 "0 (0%, 0%)" "0 (0%)"
#> **qsec** "" ""
#> mean "14.55 ± 0.071" "16.772 ± 1.196"
#> median "14.55" "17.175"
#> IQR "0.05" "1.457"
#> range "14.5-14.6" "14.5-18"
#> missing "0" "0"
SummaryStatsBy(mtcars, by1 = "cyl", by2 = "gear", var.names = c("mpg"))
#> cyl=4, gear=3 cyl=4, gear=4 cyl=4, gear=5
#> **mpg** "" "" ""
#> mean "21.5 ± NA" "26.925 ± 4.807" "28.2 ± 3.111"
#> median "21.5" "25.85" "28.2"
#> IQR "0" "8.1" "2.2"
#> range "21.5" "21.4-33.9" "26-30.4"
#> missing "0" "0" "0"
#> cyl=4 cyl=6, gear=3 cyl=6, gear=4
#> **mpg** "" "" ""
#> mean "26.664 ± 4.51" "19.75 ± 2.333" "19.75 ± 1.552"
#> median "26" "19.75" "20.1"
#> IQR "7.6" "1.65" "2.15"
#> range "21.4-33.9" "18.1-21.4" "17.8-21"
#> missing "0" "0" "0"
#> cyl=6, gear=5 cyl=6 cyl=8, gear=3
#> **mpg** "" "" ""
#> mean "19.7 ± NA" "19.743 ± 1.454" "15.05 ± 2.774"
#> median "19.7" "19.7" "15.2"
#> IQR "0" "2.35" "2.575"
#> range "19.7" "17.8-21.4" "10.4-19.2"
#> missing "0" "0" "0"
#> cyl=8, gear=5 cyl=8
#> **mpg** "" ""
#> mean "15.4 ± 0.566" "15.1 ± 2.56"
#> median "15.4" "15.2"
#> IQR "0.4" "1.85"
#> range "15-15.8" "10.4-19.2"
#> missing "0" "0"