Descriptive statistics for multiple variables for all grouping variable levels

grouped_summary(
  data,
  grouping.vars,
  measures = NULL,
  measures.type = "numeric",
  topcount.long = FALSE,
  k = 2L,
  ...
)

Arguments

data	Dataframe from which variables need to be taken.
grouping.vars	A list of grouping variables. Please use unquoted arguments (i.e., use `x` and not `"x"`).
measures	List variables for which summary needs to computed. If not specified, all variables of type specified in the argument `measures.type` will be used to calculate summaries. Don't explicitly set `measures.type = NULL` in function call, which will produce an error because the function will try to find a column in a dataframe named "NULL".
measures.type	A character indicating whether summary for numeric ("numeric") or factor/character ("factor") variables is expected (Default: `measures.type = "numeric"`). This function can't be used for both numeric and variables simultaneously.
topcount.long	If `measures.type = factor`, you can get the top counts in long format for plotting purposes. (Default: `topcount.long = FALSE`).
k	Number of digits.
...	Currently ignored.

Value

Dataframe with descriptive statistics for numeric variables (n, mean, sd, median, min, max).

Examples

# for reproducibility
set.seed(123)

# another possibility
groupedstats::grouped_summary(
  data = iris,
  grouping.vars = Species,
  measures = Sepal.Length:Petal.Width,
  measures.type = "numeric"
)
#> # A tibble: 12 x 16
#>    Species    skim_type skim_variable missing complete  mean    sd   min   p25
#>    <fct>      <chr>     <chr>           <int>    <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1 setosa     numeric   Sepal.Length        0        1 5.01  0.352   4.3  4.8 
#>  2 setosa     numeric   Sepal.Width         0        1 3.43  0.379   2.3  3.2 
#>  3 setosa     numeric   Petal.Length        0        1 1.46  0.174   1    1.4 
#>  4 setosa     numeric   Petal.Width         0        1 0.246 0.105   0.1  0.2 
#>  5 versicolor numeric   Sepal.Length        0        1 5.94  0.516   4.9  5.6 
#>  6 versicolor numeric   Sepal.Width         0        1 2.77  0.314   2    2.52
#>  7 versicolor numeric   Petal.Length        0        1 4.26  0.470   3    4   
#>  8 versicolor numeric   Petal.Width         0        1 1.33  0.198   1    1.2 
#>  9 virginica  numeric   Sepal.Length        0        1 6.59  0.636   4.9  6.22
#> 10 virginica  numeric   Sepal.Width         0        1 2.97  0.322   2.2  2.8 
#> 11 virginica  numeric   Petal.Length        0        1 5.55  0.552   4.5  5.1 
#> 12 virginica  numeric   Petal.Width         0        1 2.03  0.275   1.4  1.8 
#> # … with 7 more variables: median <dbl>, p75 <dbl>, max <dbl>, n <int>,
#> #   std.error <dbl>, mean.conf.low <dbl>, mean.conf.high <dbl>

# if no measures are chosen, all relevant columns will be summarized
groupedstats::grouped_summary(
  data = ggplot2::msleep,
  grouping.vars = vore,
  measures.type = "factor"
)
#> # A tibble: 20 x 9
#>    vore   skim_type skim_variable missing complete ordered n_unique top_counts  
#>    <fct>  <chr>     <chr>           <int>    <dbl> <lgl>      <int> <chr>       
#>  1 carni  factor    name                0    1     FALSE         19 Arc: 1, Bot…
#>  2 carni  factor    genus               0    1     FALSE         16 Pan: 3, Vul…
#>  3 carni  factor    order               0    1     FALSE          6 Car: 12, Ce…
#>  4 carni  factor    conservation        5    0.737 FALSE          6 lc: 5, vu: …
#>  5 herbi  factor    name                0    1     FALSE         32 Afr: 1, Arc…
#>  6 herbi  factor    genus               0    1     FALSE         29 Spe: 3, Equ…
#>  7 herbi  factor    order               0    1     FALSE          9 Rod: 16, Ar…
#>  8 herbi  factor    conservation        6    0.812 FALSE          6 lc: 10, dom…
#>  9 insec… factor    name                0    1     FALSE          5 Big: 1, Eas…
#> 10 insec… factor    genus               0    1     FALSE          5 Ept: 1, Myo…
#> 11 insec… factor    order               0    1     FALSE          4 Chi: 2, Cin…
#> 12 insec… factor    conservation        2    0.6   FALSE          2 lc: 2, en: …
#> 13 omni   factor    name                0    1     FALSE         20 Afr: 1, Afr…
#> 14 omni   factor    genus               0    1     FALSE         20 Aot: 1, Bla…
#> 15 omni   factor    order               0    1     FALSE          8 Pri: 10, So…
#> 16 omni   factor    conservation       11    0.45  FALSE          2 lc: 8, dom:…
#> 17 NA     factor    name                0    1     FALSE          7 Dee: 1, Des…
#> 18 NA     factor    genus               0    1     FALSE          7 Cal: 1, Par…
#> 19 NA     factor    order               0    1     FALSE          5 Rod: 3, Dip…
#> 20 NA     factor    conservation        5    0.286 FALSE          1 lc: 2, cd: …
#> # … with 1 more variable: n <int>

# for factors, you can also convert the dataframe to a long format with counts
groupedstats::grouped_summary(
  data = ggplot2::msleep,
  grouping.vars = c(vore),
  measures = c(genus:order),
  measures.type = "factor",
  topcount.long = TRUE
)
#> # A tibble: 40 x 3
#>    vore  factor.level count
#>    <fct> <chr>        <int>
#>  1 carni Pan              3
#>  2 carni Vul              2
#>  3 carni Aci              1
#>  4 carni Cal              1
#>  5 carni Car             12
#>  6 carni Cet              3
#>  7 carni Cin              1
#>  8 carni Did              1
#>  9 herbi Spe              3
#> 10 herbi Equ              2
#> # … with 30 more rows