Skip to contents

This vignette can be cited as:


To cite package 'statsExpressions' in publications use:

  Patil, I., (2021). statsExpressions: R Package for Tidy Dataframes
  and Expressions with Statistical Details. Journal of Open Source
  Software, 6(61), 3236, https://doi.org/10.21105/joss.03236

A BibTeX entry for LaTeX users is

  @Article{,
    doi = {10.21105/joss.03236},
    url = {https://doi.org/10.21105/joss.03236},
    year = {2021},
    publisher = {{The Open Journal}},
    volume = {6},
    number = {61},
    pages = {3236},
    author = {Indrajeet Patil},
    title = {{statsExpressions: {R} Package for Tidy Dataframes and Expressions with Statistical Details}},
    journal = {{Journal of Open Source Software}},
  }

One-sample tests

# setup
library(statsExpressions)

# ----------------------- parametric ---------------------------------------

set.seed(123)
one_sample_test(
  data       = ggplot2::msleep,
  x          = brainwt,
  test.value = 0.275,
  type       = "parametric"
)
#> # A tibble: 1 × 15
#>      mu statistic df.error p.value method            alternative effectsize
#>   <dbl>     <dbl>    <dbl>   <dbl> <chr>             <chr>       <chr>     
#> 1 0.275    0.0504       55   0.960 One Sample t-test two.sided   Hedges' g 
#>   estimate conf.level conf.low conf.high conf.method conf.distribution n.obs
#>      <dbl>      <dbl>    <dbl>     <dbl> <chr>       <chr>             <int>
#> 1  0.00665       0.95   -0.252     0.265 ncp         t                    56
#>   expression
#>   <list>    
#> 1 <language>

# ----------------------- non-parametric -----------------------------------

set.seed(123)
one_sample_test(
  data       = ggplot2::msleep,
  x          = brainwt,
  test.value = 0.275,
  type       = "nonparametric"
)
#> # A tibble: 1 × 12
#>   statistic    p.value method                    alternative effectsize       
#>       <dbl>      <dbl> <chr>                     <chr>       <chr>            
#> 1       248 0.00000738 Wilcoxon signed rank test two.sided   r (rank biserial)
#>   estimate conf.level conf.low conf.high conf.method n.obs expression
#>      <dbl>      <dbl>    <dbl>     <dbl> <chr>       <int> <list>    
#> 1   -0.689       0.95   -0.817    -0.497 normal         56 <language>

# ----------------------- robust --------------------------------------------

set.seed(123)
one_sample_test(
  data       = ggplot2::msleep,
  x          = brainwt,
  test.value = 0.275,
  type       = "robust"
)
#> # A tibble: 1 × 10
#>   statistic p.value n.obs method                                 effectsize  
#>       <dbl>   <dbl> <int> <chr>                                  <chr>       
#> 1     -14.7       0    56 Bootstrap-t method for one-sample test Trimmed mean
#>   estimate conf.level conf.low conf.high expression
#>      <dbl>      <dbl>    <dbl>     <dbl> <list>    
#> 1   0.0390       0.95  -0.0132    0.0911 <language>

# ----------------------- Bayesian ---------------------------------------

set.seed(123)
one_sample_test(
  data       = ggplot2::msleep,
  x          = brainwt,
  test.value = 0.275,
  type       = "bayes",
  bf.prior   = 0.8
)
#> # A tibble: 1 × 17
#>   term       effectsize      estimate conf.level conf.low conf.high    pd
#>   <chr>      <chr>              <dbl>      <dbl>    <dbl>     <dbl> <dbl>
#> 1 Difference Bayesian t-test -0.00952       0.95   -0.261     0.244 0.532
#>   rope.percentage prior.distribution prior.location prior.scale  bf10
#>             <dbl> <chr>                       <dbl>       <dbl> <dbl>
#> 1            0.58 cauchy                          0         0.8 0.130
#>   method          conf.method log_e_bf10 n.obs expression
#>   <chr>           <chr>            <dbl> <int> <list>    
#> 1 Bayesian t-test ETI              -2.04    56 <language>

Two-sample tests

within-subjects design

# setup
library(statsExpressions)

# data
df <- dplyr::filter(bugs_long, condition %in% c("LDLF", "LDHF"))

# ----------------------- parametric ---------------------------------------

set.seed(123)
two_sample_test(
  data       = df,
  x          = condition,
  y          = desire,
  paired     = TRUE,
  subject.id = subject,
  type       = "p"
)
#> # A tibble: 1 × 16
#>   term   group     statistic df.error       p.value method        alternative
#>   <chr>  <chr>         <dbl>    <dbl>         <dbl> <chr>         <chr>      
#> 1 desire condition      6.65       90 0.00000000222 Paired t-test two.sided  
#>   effectsize estimate conf.level conf.low conf.high conf.method
#>   <chr>         <dbl>      <dbl>    <dbl>     <dbl> <chr>      
#> 1 Hedges' g     0.691       0.95    0.462     0.917 ncp        
#>   conf.distribution n.obs expression
#>   <chr>             <int> <list>    
#> 1 t                    91 <language>

# ----------------------- non-parametric -----------------------------------

set.seed(123)
two_sample_test(
  data       = df,
  x          = condition,
  y          = desire,
  paired     = TRUE,
  subject.id = subject,
  type       = "np"
)
#> # A tibble: 1 × 14
#>   parameter1 parameter2 statistic      p.value method                   
#>   <chr>      <chr>          <dbl>        <dbl> <chr>                    
#> 1 desire     condition      2250. 0.0000000241 Wilcoxon signed rank test
#>   alternative effectsize        estimate conf.level conf.low conf.high
#>   <chr>       <chr>                <dbl>      <dbl>    <dbl>     <dbl>
#> 1 two.sided   r (rank biserial)    0.761       0.95    0.642     0.844
#>   conf.method n.obs expression
#>   <chr>       <int> <list>    
#> 1 normal         91 <language>

# ----------------------- robust --------------------------------------------

set.seed(123)
two_sample_test(
  data       = df,
  x          = condition,
  y          = desire,
  paired     = TRUE,
  subject.id = subject,
  type       = "r"
)
#> # A tibble: 1 × 15
#>   statistic df.error      p.value
#>       <dbl>    <dbl>        <dbl>
#> 1      6.46       54 0.0000000313
#>   method                                            
#>   <chr>                                             
#> 1 Yuen's test on trimmed means for dependent samples
#>   effectsize                                              estimate conf.level
#>   <chr>                                                      <dbl>      <dbl>
#> 1 Algina-Keselman-Penfield robust standardized difference    0.533       0.95
#>   conf.low conf.high    mu small medium large n.obs expression
#>      <dbl>     <dbl> <dbl> <dbl>  <dbl> <dbl> <int> <list>    
#> 1    0.369     0.707     0   0.1    0.3   0.5    91 <language>

# ----------------------- Bayesian ---------------------------------------

set.seed(123)
two_sample_test(
  data       = df,
  x          = condition,
  y          = desire,
  paired     = TRUE,
  subject.id = subject,
  type       = "bayes"
)
#> # A tibble: 1 × 17
#>   term       effectsize      estimate conf.level conf.low conf.high    pd
#>   <chr>      <chr>              <dbl>      <dbl>    <dbl>     <dbl> <dbl>
#> 1 Difference Bayesian t-test     1.62       0.95     1.13      2.12     1
#>   rope.percentage prior.distribution prior.location prior.scale     bf10
#>             <dbl> <chr>                       <dbl>       <dbl>    <dbl>
#> 1               0 cauchy                          0       0.707 4762370.
#>   method          conf.method log_e_bf10 n.obs expression
#>   <chr>           <chr>            <dbl> <int> <list>    
#> 1 Bayesian t-test ETI               15.4    91 <language>

between-subjects design

# setup
set.seed(123)
library(statsExpressions)

# ----------------------- parametric ---------------------------------------

# unequal variance
set.seed(123)
two_sample_test(
  data      = ToothGrowth,
  x         = supp,
  y         = len,
  type      = "p"
)
#> # A tibble: 1 × 18
#>   term  group mean.group1 mean.group2 statistic df.error p.value
#>   <chr> <chr>       <dbl>       <dbl>     <dbl>    <dbl>   <dbl>
#> 1 len   supp         20.7        17.0      1.92     55.3  0.0606
#>   method                  alternative effectsize estimate conf.level conf.low
#>   <chr>                   <chr>       <chr>         <dbl>      <dbl>    <dbl>
#> 1 Welch Two Sample t-test two.sided   Hedges' g     0.488       0.95  -0.0217
#>   conf.high conf.method conf.distribution n.obs expression
#>       <dbl> <chr>       <chr>             <int> <list>    
#> 1     0.993 ncp         t                    60 <language>

# equal variance
set.seed(123)
two_sample_test(
  data      = ToothGrowth,
  x         = supp,
  y         = len,
  var.equal = TRUE,
  type      = "p"
)
#> # A tibble: 1 × 18
#>   term  group mean.group1 mean.group2 statistic df.error p.value
#>   <chr> <chr>       <dbl>       <dbl>     <dbl>    <dbl>   <dbl>
#> 1 len   supp         20.7        17.0      1.92       58  0.0604
#>   method            alternative effectsize estimate conf.level conf.low
#>   <chr>             <chr>       <chr>         <dbl>      <dbl>    <dbl>
#> 1 Two Sample t-test two.sided   Hedges' g     0.488       0.95  -0.0217
#>   conf.high conf.method conf.distribution n.obs expression
#>       <dbl> <chr>       <chr>             <int> <list>    
#> 1     0.993 ncp         t                    60 <language>

# ----------------------- non-parametric -----------------------------------

set.seed(123)
two_sample_test(
  data      = ToothGrowth,
  x         = supp,
  y         = len,
  type      = "np"
)
#> # A tibble: 1 × 14
#>   parameter1 parameter2 statistic p.value method                 alternative
#>   <chr>      <chr>          <dbl>   <dbl> <chr>                  <chr>      
#> 1 len        supp            576.  0.0645 Wilcoxon rank sum test two.sided  
#>   effectsize        estimate conf.level conf.low conf.high conf.method n.obs
#>   <chr>                <dbl>      <dbl>    <dbl>     <dbl> <chr>       <int>
#> 1 r (rank biserial)    0.279       0.95 -0.00812     0.523 normal         60
#>   expression
#>   <list>    
#> 1 <language>

# ----------------------- robust --------------------------------------------

set.seed(123)
two_sample_test(
  data      = ToothGrowth,
  x         = supp,
  y         = len,
  type      = "r"
)
#> # A tibble: 1 × 11
#>   statistic df.error p.value
#>       <dbl>    <dbl>   <dbl>
#> 1      2.29     33.5  0.0286
#>   method                                              
#>   <chr>                                               
#> 1 Yuen's test on trimmed means for independent samples
#>   effectsize                                              estimate conf.level
#>   <chr>                                                      <dbl>      <dbl>
#> 1 Algina-Keselman-Penfield robust standardized difference    0.683       0.95
#>   conf.low conf.high n.obs expression
#>      <dbl>     <dbl> <int> <list>    
#> 1 -0.00736      2.36    60 <language>

# ----------------------- Bayesian ---------------------------------------

set.seed(123)
two_sample_test(
  data      = ToothGrowth,
  x         = supp,
  y         = len,
  type      = "bayes"
)
#> # A tibble: 1 × 17
#>   term       effectsize      estimate conf.level conf.low conf.high    pd
#>   <chr>      <chr>              <dbl>      <dbl>    <dbl>     <dbl> <dbl>
#> 1 Difference Bayesian t-test     3.16       0.95   -0.410      6.81 0.960
#>   rope.percentage prior.distribution prior.location prior.scale  bf10
#>             <dbl> <chr>                       <dbl>       <dbl> <dbl>
#> 1          0.0763 cauchy                          0       0.707  1.20
#>   method          conf.method log_e_bf10 n.obs expression
#>   <chr>           <chr>            <dbl> <int> <list>    
#> 1 Bayesian t-test ETI              0.181    60 <language>

One-way ANOVAs

within-subjects design

# setup
library(statsExpressions)

# ----------------------- parametric ---------------------------------------

if (require("afex", quietly = TRUE)) {
  set.seed(123)
  oneway_anova(
    data       = bugs_long,
    x          = condition,
    y          = desire,
    paired     = TRUE,
    subject.id = subject,
    type       = "p"
  )
}
#> # A tibble: 1 × 18
#>   term      sumsq sum.squares.error    df df.error meansq statistic  p.value
#>   <chr>     <dbl>             <dbl> <dbl>    <dbl>  <dbl>     <dbl>    <dbl>
#> 1 condition  233.              984.  2.63     229.   4.30      20.6 8.27e-11
#>   method                                              effectsize       estimate
#>   <chr>                                               <chr>               <dbl>
#> 1 ANOVA estimation for factorial designs using 'afex' Omega2 (partial)   0.0783
#>   conf.level conf.low conf.high conf.method conf.distribution n.obs expression
#>        <dbl>    <dbl>     <dbl> <chr>       <chr>             <int> <list>    
#> 1       0.95   0.0280         1 ncp         F                    88 <language>

# ----------------------- non-parametric -----------------------------------

set.seed(123)
oneway_anova(
  data       = bugs_long,
  x          = condition,
  y          = desire,
  paired     = TRUE,
  subject.id = subject,
  type       = "np"
)
#> # A tibble: 1 × 15
#>   parameter1 parameter2 statistic df.error  p.value method                
#>   <chr>      <chr>          <dbl>    <dbl>    <dbl> <chr>                 
#> 1 desire     condition       55.8        3 4.56e-12 Friedman rank sum test
#>   effectsize  estimate conf.level conf.low conf.high conf.method         
#>   <chr>          <dbl>      <dbl>    <dbl>     <dbl> <chr>               
#> 1 Kendall's W    0.211       0.95    0.148         1 percentile bootstrap
#>   conf.iterations n.obs expression
#>             <int> <int> <list>    
#> 1             100    88 <language>

# ----------------------- robust --------------------------------------------

set.seed(123)
oneway_anova(
  data       = bugs_long,
  x          = condition,
  y          = desire,
  paired     = TRUE,
  subject.id = subject,
  type       = "r"
)
#> # A tibble: 1 × 12
#>   statistic    df df.error  p.value
#>       <dbl> <dbl>    <dbl>    <dbl>
#> 1      21.0  2.73     145. 1.15e-10
#>   method                                                             
#>   <chr>                                                              
#> 1 A heteroscedastic one-way repeated measures ANOVA for trimmed means
#>   effectsize                                                      estimate
#>   <chr>                                                              <dbl>
#> 1 Algina-Keselman-Penfield robust standardized difference average    0.664
#>   conf.level conf.low conf.high n.obs expression
#>        <dbl>    <dbl>     <dbl> <int> <list>    
#> 1       0.95    0.466     0.971    88 <language>

# ----------------------- Bayesian ---------------------------------------

set.seed(123)
oneway_anova(
  data       = bugs_long,
  x          = condition,
  y          = desire,
  paired     = TRUE,
  subject.id = subject,
  type       = "bayes"
)
#> # A tibble: 8 × 20
#>   term              pd rope.percentage prior.distribution prior.location
#>   <chr>          <dbl>           <dbl> <chr>                       <dbl>
#> 1 mu             1               0     cauchy                          0
#> 2 condition-HDHF 1               0     cauchy                          0
#> 3 condition-HDLF 0.862           0.716 cauchy                          0
#> 4 condition-LDHF 0.995           0.142 cauchy                          0
#> 5 condition-LDLF 1               0     cauchy                          0
#> 6 sig2           1               0     cauchy                          0
#> 7 g_condition    1               0.376 cauchy                          0
#> 8 g_.rowid       1               0     cauchy                          0
#>   prior.scale effect        bf10 method                          log_e_bf10
#>         <dbl> <chr>        <dbl> <chr>                                <dbl>
#> 1       0.707 fixed  1372773377. Bayes factors for linear models       21.0
#> 2       0.707 fixed  1372773377. Bayes factors for linear models       21.0
#> 3       0.707 fixed  1372773377. Bayes factors for linear models       21.0
#> 4       0.707 fixed  1372773377. Bayes factors for linear models       21.0
#> 5       0.707 fixed  1372773377. Bayes factors for linear models       21.0
#> 6       1     fixed  1372773377. Bayes factors for linear models       21.0
#> 7       1     fixed  1372773377. Bayes factors for linear models       21.0
#> 8       1     fixed  1372773377. Bayes factors for linear models       21.0
#>   effectsize         estimate std.dev conf.level conf.low conf.high conf.method
#>   <chr>                 <dbl>   <dbl>      <dbl>    <dbl>     <dbl> <chr>      
#> 1 Bayesian R-squared    0.529  0.0330       0.95    0.460     0.586 HDI        
#> 2 Bayesian R-squared    0.529  0.0330       0.95    0.460     0.586 HDI        
#> 3 Bayesian R-squared    0.529  0.0330       0.95    0.460     0.586 HDI        
#> 4 Bayesian R-squared    0.529  0.0330       0.95    0.460     0.586 HDI        
#> 5 Bayesian R-squared    0.529  0.0330       0.95    0.460     0.586 HDI        
#> 6 Bayesian R-squared    0.529  0.0330       0.95    0.460     0.586 HDI        
#> 7 Bayesian R-squared    0.529  0.0330       0.95    0.460     0.586 HDI        
#> 8 Bayesian R-squared    0.529  0.0330       0.95    0.460     0.586 HDI        
#>   component   n.obs expression
#>   <chr>       <int> <list>    
#> 1 conditional    88 <language>
#> 2 conditional    88 <language>
#> 3 conditional    88 <language>
#> 4 conditional    88 <language>
#> 5 conditional    88 <language>
#> 6 conditional    88 <language>
#> 7 conditional    88 <language>
#> 8 conditional    88 <language>

between-subjects design

## setup
library(statsExpressions)

# ----------------------- parametric ---------------------------------------

# unequal variance
set.seed(123)
oneway_anova(
  data      = iris,
  x         = Species,
  y         = Sepal.Length,
  type      = "p"
)
#> # A tibble: 1 × 14
#>   statistic    df df.error  p.value
#>       <dbl> <dbl>    <dbl>    <dbl>
#> 1      139.     2     92.2 1.51e-28
#>   method                                                   effectsize estimate
#>   <chr>                                                    <chr>         <dbl>
#> 1 One-way analysis of means (not assuming equal variances) Omega2        0.743
#>   conf.level conf.low conf.high conf.method conf.distribution n.obs expression
#>        <dbl>    <dbl>     <dbl> <chr>       <chr>             <int> <list>    
#> 1       0.95    0.671         1 ncp         F                   150 <language>

# equal variance
set.seed(123)
oneway_anova(
  data      = iris,
  x         = Species,
  y         = Sepal.Length,
  var.equal = TRUE,
  type      = "p"
)
#> # A tibble: 1 × 14
#>   statistic    df df.error  p.value method                    effectsize
#>       <dbl> <dbl>    <dbl>    <dbl> <chr>                     <chr>     
#> 1      119.     2      147 1.67e-31 One-way analysis of means Omega2    
#>   estimate conf.level conf.low conf.high conf.method conf.distribution n.obs
#>      <dbl>      <dbl>    <dbl>     <dbl> <chr>       <chr>             <int>
#> 1    0.612       0.95    0.534         1 ncp         F                   150
#>   expression
#>   <list>    
#> 1 <language>

# ----------------------- non-parametric -----------------------------------

set.seed(123)
oneway_anova(
  data      = iris,
  x         = Species,
  y         = Sepal.Length,
  type      = "np"
)
#> # A tibble: 1 × 15
#>   parameter1   parameter2 statistic df.error  p.value
#>   <chr>        <chr>          <dbl>    <int>    <dbl>
#> 1 Sepal.Length Species         96.9        2 8.92e-22
#>   method                       effectsize      estimate conf.level conf.low
#>   <chr>                        <chr>              <dbl>      <dbl>    <dbl>
#> 1 Kruskal-Wallis rank sum test Epsilon2 (rank)    0.651       0.95    0.595
#>   conf.high conf.method          conf.iterations n.obs expression
#>       <dbl> <chr>                          <int> <int> <list>    
#> 1         1 percentile bootstrap             100   150 <language>

# ----------------------- robust --------------------------------------------

set.seed(123)
oneway_anova(
  data      = iris,
  x         = Species,
  y         = Sepal.Length,
  type      = "r"
)
#> # A tibble: 1 × 12
#>   statistic    df df.error p.value
#>       <dbl> <dbl>    <dbl>   <dbl>
#> 1      112.     2     53.8       0
#>   method                                           
#>   <chr>                                            
#> 1 A heteroscedastic one-way ANOVA for trimmed means
#>   effectsize                         estimate conf.level conf.low conf.high
#>   <chr>                                 <dbl>      <dbl>    <dbl>     <dbl>
#> 1 Explanatory measure of effect size    0.846       0.95    0.759     0.933
#>   n.obs expression
#>   <int> <list>    
#> 1   150 <language>

# ----------------------- Bayesian ---------------------------------------

set.seed(123)
oneway_anova(
  data      = iris,
  x         = Species,
  y         = Sepal.Length,
  type      = "bayes"
)
#> # A tibble: 6 × 18
#>   term                  pd rope.percentage prior.distribution prior.location
#>   <chr>              <dbl>           <dbl> <chr>                       <dbl>
#> 1 mu                 1               0     cauchy                          0
#> 2 Species-setosa     1               0     cauchy                          0
#> 3 Species-versicolor 0.936           0.434 cauchy                          0
#> 4 Species-virginica  1               0     cauchy                          0
#> 5 sig2               1               0     cauchy                          0
#> 6 g_Species          1               0     cauchy                          0
#>   prior.scale    bf10 method                          log_e_bf10
#>         <dbl>   <dbl> <chr>                                <dbl>
#> 1       0.707 1.87e28 Bayes factors for linear models       65.1
#> 2       0.707 1.87e28 Bayes factors for linear models       65.1
#> 3       0.707 1.87e28 Bayes factors for linear models       65.1
#> 4       0.707 1.87e28 Bayes factors for linear models       65.1
#> 5       0.707 1.87e28 Bayes factors for linear models       65.1
#> 6       0.707 1.87e28 Bayes factors for linear models       65.1
#>   effectsize         estimate std.dev conf.level conf.low conf.high conf.method
#>   <chr>                 <dbl>   <dbl>      <dbl>    <dbl>     <dbl> <chr>      
#> 1 Bayesian R-squared    0.612  0.0311       0.95    0.544     0.667 HDI        
#> 2 Bayesian R-squared    0.612  0.0311       0.95    0.544     0.667 HDI        
#> 3 Bayesian R-squared    0.612  0.0311       0.95    0.544     0.667 HDI        
#> 4 Bayesian R-squared    0.612  0.0311       0.95    0.544     0.667 HDI        
#> 5 Bayesian R-squared    0.612  0.0311       0.95    0.544     0.667 HDI        
#> 6 Bayesian R-squared    0.612  0.0311       0.95    0.544     0.667 HDI        
#>   n.obs expression
#>   <int> <list>    
#> 1   150 <language>
#> 2   150 <language>
#> 3   150 <language>
#> 4   150 <language>
#> 5   150 <language>
#> 6   150 <language>

Contingency table analyses

association test

set.seed(123)
library(statsExpressions)

# ------------------------ frequentist -----------------------------

# unpaired
set.seed(123)
contingency_table(
  data   = mtcars,
  x      = am,
  y      = cyl,
  paired = FALSE
)
#> # A tibble: 1 × 13
#>   statistic    df p.value method                     effectsize        estimate
#>       <dbl> <int>   <dbl> <chr>                      <chr>                <dbl>
#> 1      8.74     2  0.0126 Pearson's Chi-squared test Cramer's V (adj.)    0.464
#>   conf.level conf.low conf.high conf.method conf.distribution n.obs expression
#>        <dbl>    <dbl>     <dbl> <chr>       <chr>             <int> <list>    
#> 1       0.95        0         1 ncp         chisq                32 <language>

# paired
## create data structure
paired_data <-
  structure(
    list(
      response_before =
        structure(
          c(1L, 2L, 1L, 2L),
          .Label = c("no", "yes"),
          class = "factor"
        ),
      response_after = structure(
        c(1L, 1L, 2L, 2L),
        .Label = c("no", "yes"),
        class = "factor"
      ),
      Freq = c(65L, 25L, 5L, 5L)
    ),
    class = "data.frame",
    row.names = c(NA, -4L)
  )


set.seed(123)
contingency_table(
  data   = paired_data,
  x      = response_before,
  y      = response_after,
  paired = TRUE,
  counts = "Freq"
)
#> # A tibble: 1 × 12
#>   statistic    df  p.value method                     effectsize estimate
#>       <dbl> <dbl>    <dbl> <chr>                      <chr>         <dbl>
#> 1      13.3     1 0.000261 McNemar's Chi-squared test Cohen's g     0.333
#>   conf.level conf.low conf.high conf.method n.obs expression
#>        <dbl>    <dbl>     <dbl> <chr>       <int> <list>    
#> 1       0.95    0.164     0.427 binomial      100 <language>

# ------------------------ Bayesian -----------------------------

# unpaired
set.seed(123)
contingency_table(
  data   = mtcars,
  x      = am,
  y      = cyl,
  paired = FALSE,
  type   = "bayes"
)
#> # A tibble: 1 × 15
#>   term  conf.level effectsize estimate conf.low conf.high
#>   <chr>      <dbl> <chr>         <dbl>    <dbl>     <dbl>
#> 1 Ratio       0.95 Cramers_v     0.478    0.201     0.714
#>   prior.distribution      prior.location prior.scale  bf10
#>   <chr>                            <dbl>       <dbl> <dbl>
#> 1 independent multinomial              0           1  16.8
#>   method                              conf.method log_e_bf10 n.obs expression
#>   <chr>                               <chr>            <dbl> <int> <list>    
#> 1 Bayesian contingency table analysis ETI               2.82    32 <language>

goodness-of-fit tests

# ------------------------ frequentist -----------------------------

# with counts
set.seed(123)
contingency_table(
  data   = as.data.frame(HairEyeColor),
  x      = Eye,
  counts = Freq,
)
#> # A tibble: 1 × 13
#>   statistic    df  p.value method                                   effectsize 
#>       <dbl> <dbl>    <dbl> <chr>                                    <chr>      
#> 1      133.     3 9.65e-29 Chi-squared test for given probabilities Pearson's C
#>   estimate conf.level conf.low conf.high conf.method conf.distribution n.obs
#>      <dbl>      <dbl>    <dbl>     <dbl> <chr>       <chr>             <int>
#> 1    0.429       0.95    0.374         1 ncp         chisq               592
#>   expression
#>   <list>    
#> 1 <language>

# ------------------------ Bayesian -----------------------------

set.seed(123)
contingency_table(
  data   = as.data.frame(HairEyeColor),
  x      = Eye,
  counts = Freq,
  ratio  = c(0.2, 0.2, 0.3, 0.3),
  type   = "bayes"
)
#> # A tibble: 1 × 4
#>      bf10 prior.scale method                                      expression
#>     <dbl>       <dbl> <chr>                                       <list>    
#> 1 4.17e55           1 Bayesian one-way contingency table analysis <language>

Correlation analyses

set.seed(123)
library(statsExpressions)

# ----------------------- parametric ---------------------------------------

set.seed(123)
corr_test(
  data = mtcars,
  x    = wt,
  y    = mpg,
  type = "parametric"
)
#> # A tibble: 1 × 14
#>   parameter1 parameter2 effectsize          estimate conf.level conf.low
#>   <chr>      <chr>      <chr>                  <dbl>      <dbl>    <dbl>
#> 1 wt         mpg        Pearson correlation   -0.868       0.95   -0.934
#>   conf.high statistic df.error  p.value method              n.obs conf.method
#>       <dbl>     <dbl>    <int>    <dbl> <chr>               <int> <chr>      
#> 1    -0.744     -9.56       30 1.29e-10 Pearson correlation    32 normal     
#>   expression
#>   <list>    
#> 1 <language>

# ----------------------- non-parametric -----------------------------------

set.seed(123)
corr_test(
  data = mtcars,
  x    = wt,
  y    = mpg,
  type = "nonparametric"
)
#> # A tibble: 1 × 13
#>   parameter1 parameter2 effectsize           estimate conf.level conf.low
#>   <chr>      <chr>      <chr>                   <dbl>      <dbl>    <dbl>
#> 1 wt         mpg        Spearman correlation   -0.886       0.95   -0.945
#>   conf.high statistic  p.value method               n.obs conf.method expression
#>       <dbl>     <dbl>    <dbl> <chr>                <int> <chr>       <list>    
#> 1    -0.774    10292. 1.49e-11 Spearman correlation    32 normal      <language>

# ----------------------- robust --------------------------------------------

set.seed(123)
corr_test(
  data = mtcars,
  x    = wt,
  y    = mpg,
  type = "robust"
)
#> # A tibble: 1 × 14
#>   parameter1 parameter2 effectsize                     estimate conf.level
#>   <chr>      <chr>      <chr>                             <dbl>      <dbl>
#> 1 wt         mpg        Winsorized Pearson correlation   -0.864       0.95
#>   conf.low conf.high statistic df.error  p.value method                        
#>      <dbl>     <dbl>     <dbl>    <int>    <dbl> <chr>                         
#> 1   -0.932    -0.738     -9.41       30 1.84e-10 Winsorized Pearson correlation
#>   n.obs conf.method expression
#>   <int> <chr>       <list>    
#> 1    32 normal      <language>

# ----------------------- Bayesian ---------------------------------------

set.seed(123)
corr_test(
  data = mtcars,
  x    = wt,
  y    = mpg,
  type = "bayes"
)
#> # A tibble: 1 × 17
#>   parameter1 parameter2 effectsize                   estimate conf.level
#>   <chr>      <chr>      <chr>                           <dbl>      <dbl>
#> 1 wt         mpg        Bayesian Pearson correlation   -0.843       0.95
#>   conf.low conf.high    pd rope.percentage prior.distribution prior.location
#>      <dbl>     <dbl> <dbl>           <dbl> <chr>                       <dbl>
#> 1   -0.934    -0.734     1               0 beta                         1.41
#>   prior.scale      bf10 method                       n.obs conf.method
#>         <dbl>     <dbl> <chr>                        <int> <chr>      
#> 1        1.41 56223033. Bayesian Pearson correlation    32 HDI        
#>   expression
#>   <list>    
#> 1 <language>

Meta-analysis

library(statsExpressions)
library(metaplus)

# renaming to what `{statsExpressions}` expects
df <- dplyr::rename(mag, estimate = yi, std.error = sei)

# ----------------------- parametric ---------------------------------------

set.seed(123)
meta_analysis(df, type = "parametric")
#> # A tibble: 1 × 13
#>   term    effectsize                     estimate std.error conf.level conf.low
#>   <chr>   <chr>                             <dbl>     <dbl>      <dbl>    <dbl>
#> 1 Overall meta-analytic summary estimate   -0.767     0.212       0.95    -1.18
#>   conf.high statistic  p.value method                        conf.method n.obs
#>       <dbl>     <dbl>    <dbl> <chr>                         <chr>       <int>
#> 1    -0.351     -3.62 0.000295 Meta-analysis using 'metafor' Wald           16
#>   expression
#>   <list>    
#> 1 <language>

# ----------------------- robust --------------------------------------------

set.seed(123)
meta_analysis(df, type = "robust")
#> # A tibble: 1 × 13
#>   term    effectsize                     estimate std.error conf.low conf.high
#>   <chr>   <chr>                             <dbl>     <dbl>    <dbl>     <dbl>
#> 1 Overall meta-analytic summary estimate   -0.746     0.233    -1.26    -0.344
#>   statistic  p.value conf.level method                               
#>       <dbl>    <dbl>      <dbl> <chr>                                
#> 1     -3.20 0.000501       0.95 Robust meta-analysis using 'metaplus'
#>   conf.method n.obs expression
#>   <chr>       <int> <list>    
#> 1 Wald           16 <language>

# ----------------------- Bayesian ---------------------------------------

set.seed(123)
meta_analysis(df, type = "bayes")
#> # A tibble: 2 × 19
#>   term    effectsize                       estimate std.error conf.level
#>   <chr>   <chr>                               <dbl>     <dbl>      <dbl>
#> 1 Overall meta-analytic posterior estimate   -0.649     0.224       0.95
#> 2 tau     meta-analytic posterior estimate    0.487     0.182       0.95
#>   conf.low conf.high  bf10  rhat   ess component prior.distribution
#>      <dbl>     <dbl> <dbl> <dbl> <dbl> <chr>     <chr>             
#> 1   -1.13     -0.241  53.0  1.00 3771. meta      Student's t       
#> 2    0.207     0.905  53.0  1.00 3692. meta      Inverse gamma     
#>   prior.location prior.scale method                                 conf.method
#>            <dbl>       <dbl> <chr>                                  <chr>      
#> 1              0       0.707 Bayesian meta-analysis using 'metaBMA' ETI        
#> 2              1       0.15  Bayesian meta-analysis using 'metaBMA' ETI        
#>   log_e_bf10 n.obs expression
#>        <dbl> <int> <list>    
#> 1       3.97    16 <language>
#> 2       3.97    16 <language>

Centrality description

# ----------------------- parametric -----------------------

set.seed(123)
centrality_description(iris, Species, Sepal.Length)
#> # A tibble: 3 × 14
#>   Species    Sepal.Length std.dev   iqr conf.low conf.high   min   max skewness
#>   <fct>             <dbl>   <dbl> <dbl>    <dbl>     <dbl> <dbl> <dbl>    <dbl>
#> 1 setosa             5.01   0.352 0.400     4.90      5.10   4.3   5.8    0.120
#> 2 versicolor         5.94   0.516 0.7       5.80      6.07   4.9   7      0.105
#> 3 virginica          6.59   0.636 0.750     6.39      6.79   4.9   7.9    0.118
#>   kurtosis n.obs missing.obs expression n.expression          
#>      <dbl> <int>       <int> <list>     <chr>                 
#> 1  -0.253     50           0 <language> "setosa\n(n = 50)"    
#> 2  -0.533     50           0 <language> "versicolor\n(n = 50)"
#> 3   0.0329    50           0 <language> "virginica\n(n = 50)"

# ----------------------- non-parametric -------------------

set.seed(123)
centrality_description(mtcars, am, wt, type = "n")
#> # A tibble: 2 × 14
#>      am    wt   mad   iqr conf.low conf.high   min   max skewness kurtosis n.obs
#>   <dbl> <dbl> <dbl> <dbl>    <dbl>     <dbl> <dbl> <dbl>    <dbl>    <dbl> <int>
#> 1     0  3.52 0.452 0.41      3.44      3.84  2.46  5.42    1.15     1.06     19
#> 2     1  2.32 0.682 0.942     1.94      2.78  1.51  3.57    0.269   -0.654    13
#>   missing.obs expression n.expression 
#>         <int> <list>     <chr>        
#> 1           0 <language> "0\n(n = 19)"
#> 2           0 <language> "1\n(n = 13)"

# ----------------------- robust ---------------------------

set.seed(123)
centrality_description(ToothGrowth, supp, len, type = "r")
#> # A tibble: 2 × 14
#>   supp    len std.dev   iqr conf.low conf.high   min   max skewness kurtosis
#>   <fct> <dbl>   <dbl> <dbl>    <dbl>     <dbl> <dbl> <dbl>    <dbl>    <dbl>
#> 1 OJ     21.7    6.61  10.9     18.1      23.6   8.2  30.9   -0.580   -0.831
#> 2 VC     16.6    8.27  12.5     13.6      19.9   4.2  33.9    0.306   -0.700
#>   n.obs missing.obs expression n.expression  
#>   <int>       <int> <list>     <chr>         
#> 1    30           0 <language> "OJ\n(n = 30)"
#> 2    30           0 <language> "VC\n(n = 30)"

# ----------------------- Bayesian -------------------------

set.seed(123)
centrality_description(sleep, group, extra, type = "b")
#> # A tibble: 2 × 13
#>   group  extra   iqr conf.low conf.high   min   max skewness kurtosis n.obs
#>   <fct>  <dbl> <dbl>    <dbl>     <dbl> <dbl> <dbl>    <dbl>    <dbl> <int>
#> 1 1     0.0579  2.8   -1.29        3.55  -1.6   3.7    0.581   -0.630    10
#> 2 2     0.973   3.82   0.0527      4.79  -0.1   5.5    0.386   -1.42     10
#>   missing.obs expression n.expression 
#>         <int> <list>     <chr>        
#> 1           0 <language> "1\n(n = 10)"
#> 2           0 <language> "2\n(n = 10)"

Pairwise comparisons for one-way design

Between-subjects design

# for reproducibility
set.seed(123)
library(statsExpressions)

# ----------------------- parametric -----------------------

# if `var.equal = TRUE`, then Student's *t*-test will be run
pairwise_comparisons(
  data            = ggplot2::msleep,
  x               = vore,
  y               = brainwt,
  type            = "parametric",
  var.equal       = TRUE,
  paired          = FALSE,
  p.adjust.method = "bonferroni"
)
#> # A tibble: 6 × 6
#>   group1  group2  p.value p.adjust.method test        expression
#>   <chr>   <chr>     <dbl> <chr>           <chr>       <list>    
#> 1 carni   herbi     1     Bonferroni      Student's t <language>
#> 2 carni   insecti   1     Bonferroni      Student's t <language>
#> 3 carni   omni      1     Bonferroni      Student's t <language>
#> 4 herbi   insecti   1     Bonferroni      Student's t <language>
#> 5 herbi   omni      0.979 Bonferroni      Student's t <language>
#> 6 insecti omni      1     Bonferroni      Student's t <language>

# if `var.equal = FALSE`, then Games-Howell test will be run
pairwise_comparisons(
  data            = ggplot2::msleep,
  x               = vore,
  y               = brainwt,
  type            = "parametric",
  var.equal       = FALSE,
  paired          = FALSE,
  p.adjust.method = "bonferroni"
)
#> # A tibble: 6 × 9
#>   group1  group2  statistic p.value alternative distribution p.adjust.method
#>   <chr>   <chr>       <dbl>   <dbl> <chr>       <chr>        <chr>          
#> 1 carni   herbi        2.17       1 two.sided   q            Bonferroni     
#> 2 carni   insecti     -2.17       1 two.sided   q            Bonferroni     
#> 3 carni   omni         1.10       1 two.sided   q            Bonferroni     
#> 4 herbi   insecti     -2.41       1 two.sided   q            Bonferroni     
#> 5 herbi   omni        -1.87       1 two.sided   q            Bonferroni     
#> 6 insecti omni         2.19       1 two.sided   q            Bonferroni     
#>   test         expression
#>   <chr>        <list>    
#> 1 Games-Howell <language>
#> 2 Games-Howell <language>
#> 3 Games-Howell <language>
#> 4 Games-Howell <language>
#> 5 Games-Howell <language>
#> 6 Games-Howell <language>

# ----------------------- non-parametric -------------------

pairwise_comparisons(
  data            = ggplot2::msleep,
  x               = vore,
  y               = brainwt,
  type            = "nonparametric",
  paired          = FALSE,
  p.adjust.method = "none"
)
#> # A tibble: 6 × 9
#>   group1  group2  statistic p.value alternative distribution p.adjust.method
#>   <chr>   <chr>       <dbl>   <dbl> <chr>       <chr>        <chr>          
#> 1 carni   herbi       0.582  0.561  two.sided   z            None           
#> 2 carni   insecti     1.88   0.0595 two.sided   z            None           
#> 3 carni   omni        1.14   0.254  two.sided   z            None           
#> 4 herbi   insecti     1.63   0.102  two.sided   z            None           
#> 5 herbi   omni        0.717  0.474  two.sided   z            None           
#> 6 insecti omni        1.14   0.254  two.sided   z            None           
#>   test  expression
#>   <chr> <list>    
#> 1 Dunn  <language>
#> 2 Dunn  <language>
#> 3 Dunn  <language>
#> 4 Dunn  <language>
#> 5 Dunn  <language>
#> 6 Dunn  <language>

# ----------------------- robust ---------------------------

pairwise_comparisons(
  data            = ggplot2::msleep,
  x               = vore,
  y               = brainwt,
  type            = "robust",
  paired          = FALSE,
  p.adjust.method = "fdr"
)
#> # A tibble: 6 × 10
#>   group1  group2  estimate conf.level conf.low conf.high p.value p.adjust.method
#>   <chr>   <chr>      <dbl>      <dbl>    <dbl>     <dbl>   <dbl> <chr>          
#> 1 carni   herbi   -0.0323        0.95  -0.248     0.184    0.790 FDR            
#> 2 carni   insecti  0.0451        0.95  -0.0484    0.139    0.552 FDR            
#> 3 carni   omni     0.00520       0.95  -0.114     0.124    0.898 FDR            
#> 4 herbi   insecti  0.0774        0.95  -0.133     0.288    0.552 FDR            
#> 5 herbi   omni     0.0375        0.95  -0.182     0.257    0.790 FDR            
#> 6 insecti omni    -0.0399        0.95  -0.142     0.0625   0.552 FDR            
#>   test                 expression
#>   <chr>                <list>    
#> 1 Yuen's trimmed means <language>
#> 2 Yuen's trimmed means <language>
#> 3 Yuen's trimmed means <language>
#> 4 Yuen's trimmed means <language>
#> 5 Yuen's trimmed means <language>
#> 6 Yuen's trimmed means <language>

# ----------------------- Bayesian -------------------------

pairwise_comparisons(
  data   = ggplot2::msleep,
  x      = vore,
  y      = brainwt,
  type   = "bayes",
  paired = FALSE
)
#> # A tibble: 6 × 19
#>   group1  group2  term       effectsize      estimate conf.level conf.low
#>   <chr>   <chr>   <chr>      <chr>              <dbl>      <dbl>    <dbl>
#> 1 carni   herbi   Difference Bayesian t-test  -0.376        0.95  -1.36  
#> 2 carni   insecti Difference Bayesian t-test   0.0339       0.95  -0.0440
#> 3 carni   omni    Difference Bayesian t-test  -0.0447       0.95  -0.241 
#> 4 herbi   insecti Difference Bayesian t-test   0.353        0.95  -0.733 
#> 5 herbi   omni    Difference Bayesian t-test   0.364        0.95  -0.313 
#> 6 insecti omni    Difference Bayesian t-test  -0.0766       0.95  -0.337 
#>   conf.high    pd rope.percentage prior.distribution prior.location prior.scale
#>       <dbl> <dbl>           <dbl> <chr>                       <dbl>       <dbl>
#> 1     0.508 0.800           0.171 cauchy                          0       0.707
#> 2     0.127 0.812           0.136 cauchy                          0       0.707
#> 3     0.145 0.688           0.214 cauchy                          0       0.707
#> 4     1.62  0.742           0.178 cauchy                          0       0.707
#> 5     1.10  0.848           0.164 cauchy                          0       0.707
#> 6     0.164 0.743           0.171 cauchy                          0       0.707
#>    bf10 conf.method log_e_bf10 n.obs expression test       
#>   <dbl> <chr>            <dbl> <int> <list>     <chr>      
#> 1 0.540 ETI             -0.617    29 <language> Student's t
#> 2 0.718 ETI             -0.332    14 <language> Student's t
#> 3 0.427 ETI             -0.851    26 <language> Student's t
#> 4 0.540 ETI             -0.616    25 <language> Student's t
#> 5 0.571 ETI             -0.560    37 <language> Student's t
#> 6 0.545 ETI             -0.606    22 <language> Student's t

Within-subjects design

# for reproducibility
set.seed(123)

# ----------------------- parametric -----------------------

pairwise_comparisons(
  data            = bugs_long,
  x               = condition,
  y               = desire,
  subject.id      = subject,
  type            = "parametric",
  paired          = TRUE,
  p.adjust.method = "BH"
)
#> # A tibble: 6 × 6
#>   group1 group2  p.value p.adjust.method test        expression
#>   <chr>  <chr>     <dbl> <chr>           <chr>       <list>    
#> 1 HDHF   HDLF   1.06e- 3 FDR             Student's t <language>
#> 2 HDHF   LDHF   7.02e- 2 FDR             Student's t <language>
#> 3 HDHF   LDLF   3.95e-12 FDR             Student's t <language>
#> 4 HDLF   LDHF   6.74e- 2 FDR             Student's t <language>
#> 5 HDLF   LDLF   1.99e- 3 FDR             Student's t <language>
#> 6 LDHF   LDLF   6.66e- 9 FDR             Student's t <language>

# ----------------------- non-parametric -------------------

pairwise_comparisons(
  data            = bugs_long,
  x               = condition,
  y               = desire,
  subject.id      = subject,
  type            = "nonparametric",
  paired          = TRUE,
  p.adjust.method = "BY"
)
#> # A tibble: 6 × 9
#>   group1 group2 statistic  p.value alternative distribution p.adjust.method
#>   <chr>  <chr>      <dbl>    <dbl> <chr>       <chr>        <chr>          
#> 1 HDHF   HDLF        4.78 1.44e- 5 two.sided   t            BY             
#> 2 HDHF   LDHF        2.44 4.47e- 2 two.sided   t            BY             
#> 3 HDHF   LDLF        8.01 5.45e-13 two.sided   t            BY             
#> 4 HDLF   LDHF        2.34 4.96e- 2 two.sided   t            BY             
#> 5 HDLF   LDLF        3.23 5.05e- 3 two.sided   t            BY             
#> 6 LDHF   LDLF        5.57 4.64e- 7 two.sided   t            BY             
#>   test           expression
#>   <chr>          <list>    
#> 1 Durbin-Conover <language>
#> 2 Durbin-Conover <language>
#> 3 Durbin-Conover <language>
#> 4 Durbin-Conover <language>
#> 5 Durbin-Conover <language>
#> 6 Durbin-Conover <language>

# ----------------------- robust ---------------------------

pairwise_comparisons(
  data            = bugs_long,
  x               = condition,
  y               = desire,
  subject.id      = subject,
  type            = "robust",
  paired          = TRUE,
  p.adjust.method = "hommel"
)
#> # A tibble: 6 × 11
#>   group1 group2 estimate conf.level conf.low conf.high     p.value  p.crit
#>   <chr>  <chr>     <dbl>      <dbl>    <dbl>     <dbl>       <dbl>   <dbl>
#> 1 HDHF   HDLF      1.03        0.95   0.140      1.92  0.00999     0.0127 
#> 2 HDHF   LDHF      0.454       0.95  -0.104      1.01  0.0520      0.025  
#> 3 HDHF   LDLF      1.95        0.95   1.09       2.82  0.000000564 0.00851
#> 4 HDLF   LDHF     -0.676       0.95  -1.61       0.256 0.0520      0.05   
#> 5 HDLF   LDLF      0.889       0.95   0.0244     1.75  0.0203      0.0169 
#> 6 LDHF   LDLF      1.35        0.95   0.560      2.14  0.000102    0.0102 
#>   p.adjust.method test                 expression
#>   <chr>           <chr>                <list>    
#> 1 Hommel          Yuen's trimmed means <language>
#> 2 Hommel          Yuen's trimmed means <language>
#> 3 Hommel          Yuen's trimmed means <language>
#> 4 Hommel          Yuen's trimmed means <language>
#> 5 Hommel          Yuen's trimmed means <language>
#> 6 Hommel          Yuen's trimmed means <language>

# ----------------------- Bayesian -------------------------

pairwise_comparisons(
  data       = bugs_long,
  x          = condition,
  y          = desire,
  subject.id = subject,
  type       = "bayes",
  paired     = TRUE,
  bf.prior   = 0.77
)
#> # A tibble: 6 × 19
#>   group1 group2 term       effectsize      estimate conf.level conf.low
#>   <chr>  <chr>  <chr>      <chr>              <dbl>      <dbl>    <dbl>
#> 1 HDHF   HDLF   Difference Bayesian t-test    1.10        0.95   0.491 
#> 2 HDHF   LDHF   Difference Bayesian t-test    0.453       0.95  -0.0550
#> 3 HDHF   LDLF   Difference Bayesian t-test    2.13        0.95   1.62  
#> 4 HDLF   LDHF   Difference Bayesian t-test   -0.653       0.95  -1.32  
#> 5 HDLF   LDLF   Difference Bayesian t-test    0.980       0.95   0.383 
#> 6 LDHF   LDLF   Difference Bayesian t-test    1.66        0.95   1.15  
#>   conf.high    pd rope.percentage prior.distribution prior.location prior.scale
#>       <dbl> <dbl>           <dbl> <chr>                       <dbl>       <dbl>
#> 1    1.73   1               0     cauchy                          0        0.77
#> 2    0.953  0.954           0.188 cauchy                          0        0.77
#> 3    2.64   1               0     cauchy                          0        0.77
#> 4    0.0552 0.968           0.164 cauchy                          0        0.77
#> 5    1.60   0.999           0     cauchy                          0        0.77
#> 6    2.15   1               0     cauchy                          0        0.77
#>       bf10 conf.method log_e_bf10 n.obs expression test       
#>      <dbl> <chr>            <dbl> <int> <list>     <chr>      
#> 1 3.95e+ 1 ETI              3.68     88 <language> Student's t
#> 2 5.42e- 1 ETI             -0.612    88 <language> Student's t
#> 3 1.22e+10 ETI             23.2      88 <language> Student's t
#> 4 6.50e- 1 ETI             -0.430    88 <language> Student's t
#> 5 1.72e+ 1 ETI              2.84     88 <language> Student's t
#> 6 4.78e+ 6 ETI             15.4      88 <language> Student's t

Suggestions

If you find any bugs or have any suggestions/remarks, please file an issue on GitHub: https://github.com/IndrajeetPatil/statsExpressions/issues