The project provides a basic analysis of the ToothGrowth
data in the R datasets package. The data gives the length of the odontoblast (teeth) in each of 10 guinea pigs at three different dosage levels (0.5, 1, and 2 mg) with two supplements (Vitamin C and Orange Juice). The following will occur;
supp
and dose
.Necessary libraries for loading, manipulating, and plotting. Reading the ToothGrowth
dataset into a data.table
.
library(data.table)
library(ggplot2)
library(gridExtra)
library(dplyr)
library(printr)
data("ToothGrowth")
tooth_growth <- data.table(ToothGrowth)
str(tooth_growth)
## Classes 'data.table' and 'data.frame': 60 obs. of 3 variables:
## $ len : num 4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
## $ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ...
## $ dose: num 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
## - attr(*, ".internal.selfref")=<externalptr>
head(tooth_growth)
len | supp | dose |
---|---|---|
4.2 | VC | 0.5 |
11.5 | VC | 0.5 |
7.3 | VC | 0.5 |
5.8 | VC | 0.5 |
6.4 | VC | 0.5 |
10.0 | VC | 0.5 |
summary(tooth_growth)
len | supp | dose | |
---|---|---|---|
Min. : 4.20 | OJ:30 | Min. :0.500 | |
1st Qu.:13.07 | VC:30 | 1st Qu.:0.500 | |
Median :19.25 | NA | Median :1.000 | |
Mean :18.81 | NA | Mean :1.167 | |
3rd Qu.:25.27 | NA | 3rd Qu.:2.000 | |
Max. :33.90 | NA | Max. :2.000 |
Setting the dose
variable as factors to make the plotting and t-tests easier. Plotting a grid of box plots for all the different combinations of len
as it depends on supp
and dose
.
tooth_growth$dose <- as.factor(tooth_growth$dose)
plot1 <- ggplot(data = tooth_growth, aes(x = dose, y = len)) +
geom_boxplot(aes(fill = dose)) +
facet_grid(.~supp) +
theme(legend.position = "None") +
xlab(" ") + ylab("Length")
plot2 <- ggplot(data = tooth_growth, aes(x = supp, y = len)) +
geom_boxplot(aes(fill = supp)) +
facet_grid(.~dose) +
theme(legend.position = "None") +
xlab(" ") + ylab(" ")
plot3 <- ggplot(data = tooth_growth, aes(x = supp, y = len)) +
geom_boxplot(aes(fill = supp)) +
theme(legend.position = "None") +
xlab("Supplement") + ylab(" ")
plot4 <- ggplot(data = tooth_growth, aes(x = dose, y = len)) +
geom_boxplot(aes(fill = dose)) +
theme(legend.position = "None") +
xlab("Dosage (mg)") + ylab("Length")
grid.arrange(plot1, plot2, plot4, plot3, nrow = 2, ncol = 2)
The goal of the project is to analyze the effect of the different supplements (Vitamin C and Orange Juice) at all of the dose levels (0.5, 1, 2 mg). The following is the code used to perform all the different combinations of t-tests. All of the test were performed using a \(95\%\) confidence level, with unequal variances assumed.
OJvsVC <- t.test(len ~ supp, data = tooth_growth)
my.t.test <- function(fac1,fac2) {
t.test(tooth_growth$len[tooth_growth$dose == fac1],
tooth_growth$len[tooth_growth$dose == fac2])
}
supp.t.test <- function(supp1,dose1,supp2,dose2) {
sub_tooth <- filter(tooth_growth, (dose == as.numeric(dose1) & supp == supp1 ) |
dose == as.numeric(dose2) & supp == supp2)
t.test(len ~ supp, data=sub_tooth)
}
same_supp.t.test <- function(supp1,dose1,dose2) {
sub_tooth <- filter(tooth_growth, (dose == as.numeric(dose1) & supp == supp1 ) |
dose == as.numeric(dose2) & supp == supp1)
t.test(len ~ dose, data=sub_tooth)
}
my_tests <- mapply(FUN = my.t.test,c(.5,.5,1.0),c(1.0,2.0,2.0),SIMPLIFY = FALSE)
d1 <- c(0.5,1.0,2.0,0.5,0.5,1.0,2.0,1.0,2.0)
d2 <- c(0.5,0.5,0.5,1.0,2.0,1.0,1.0,2.0,2.0)
supp_tests <- mapply(FUN = supp.t.test,rep("VC",9),d1,rep("OJ",9),d2,SIMPLIFY = FALSE)
d3 <- rep(c("OJ","VC"),each = 3)
d4 <- rep(c(1.0,2.0,2.0), times = 2)
d5 <- rep(c(0.5,0.5,1.0), times = 2)
same_supp_tests <- mapply(FUN = same_supp.t.test,d3,d4,d5,SIMPLIFY = FALSE)
The following code takes all of the t-test results from the previous section and places them in a data.frame
for easier readability, and quicker viewing. Note that all values have been rounded to four decimal places.
t_table <- list(OJvsVC)
t_table <- append(t_table,my_tests)
t_table <- append(t_table,same_supp_tests)
t_table <- append(t_table,supp_tests)
t.stat = c(); df = c(); lower.CL = c(); upper.CL = c()
p.value = c(); mean.A = c(); mean.B = c()
rnames <- c("VC vs OJ",
"1.0 vs 0.5","2.0 vs 0.5","2.0 vs 1.0",
"OJ-1.0 vs 0.5","OJ-2.0 vs 0.5","OJ-2.0 vs 1.0",
"VC-1.0 vs 0.5","VC-2.0 vs 0.5","VC-2.0 vs 1.0",
"VC-0.5 vs OJ-0.5","VC-1.0 vs OJ-0.5","VC-2.0 vs OJ-0.5",
"VC-0.5 vs OJ-1.0","VC-0.5 vs OJ-2.0","VC-1.0 vs OJ-1.0",
"VC-2.0 vs OJ-1.0","VC-1.0 vs OJ-2.0","VC-2.0 vs OJ-2.0")
for (i in 1:19) {
t.stat <- append(t.stat,c(t_table[[i]]$statistic))
df <- append(df,c(t_table[[i]]$parameter))
lower.CL <- append(lower.CL,c(t_table[[i]]$conf.int[1]))
upper.CL <- append(upper.CL,c(t_table[[i]]$conf.int[2]))
p.value <- append(p.value,c(t_table[[i]]$p.value))
mean.B <- append(mean.B,c(t_table[[i]]$estimate[1]))
mean.A <- append(mean.A,c(t_table[[i]]$estimate[2]))
}
mean.diff <- mean.A - mean.B
t_results <- data.frame("t.stat" = t.stat, "df" = df,
"lower.CL" = lower.CL, "upper.CL" = upper.CL,
"p.value" = p.value, "mean.A" = mean.A,
"mean.B" = mean.B, "mean.diff" = mean.diff,
row.names = rnames)
knitr::kable(round(t_results,4), caption = "Welch Two Sample t-test Results")
t.stat | df | lower.CL | upper.CL | p.value | mean.A | mean.B | mean.diff | |
---|---|---|---|---|---|---|---|---|
VC vs OJ | 1.9153 | 55.3094 | -0.1710 | 7.5710 | 0.0606 | 16.9633 | 20.6633 | -3.700 |
1.0 vs 0.5 | -6.4766 | 37.9864 | -11.9838 | -6.2762 | 0.0000 | 19.7350 | 10.6050 | 9.130 |
2.0 vs 0.5 | -11.7990 | 36.8826 | -18.1562 | -12.8338 | 0.0000 | 26.1000 | 10.6050 | 15.495 |
2.0 vs 1.0 | -4.9005 | 37.1011 | -8.9965 | -3.7335 | 0.0000 | 26.1000 | 19.7350 | 6.365 |
OJ-1.0 vs 0.5 | -5.0486 | 17.6983 | -13.4156 | -5.5244 | 0.0001 | 22.7000 | 13.2300 | 9.470 |
OJ-2.0 vs 0.5 | -7.8170 | 14.6678 | -16.3352 | -9.3248 | 0.0000 | 26.0600 | 13.2300 | 12.830 |
OJ-2.0 vs 1.0 | -2.2478 | 15.8424 | -6.5314 | -0.1886 | 0.0392 | 26.0600 | 22.7000 | 3.360 |
VC-1.0 vs 0.5 | -7.4634 | 17.8624 | -11.2657 | -6.3143 | 0.0000 | 16.7700 | 7.9800 | 8.790 |
VC-2.0 vs 0.5 | -10.3878 | 14.3271 | -21.9015 | -14.4185 | 0.0000 | 26.1400 | 7.9800 | 18.160 |
VC-2.0 vs 1.0 | -5.4698 | 13.6000 | -13.0543 | -5.6857 | 0.0001 | 26.1400 | 16.7700 | 9.370 |
VC-0.5 vs OJ-0.5 | 3.1697 | 14.9688 | 1.7191 | 8.7809 | 0.0064 | 7.9800 | 13.2300 | -5.250 |
VC-1.0 vs OJ-0.5 | -2.1864 | 14.1997 | -7.0081 | -0.0719 | 0.0460 | 16.7700 | 13.2300 | 3.540 |
VC-2.0 vs OJ-0.5 | -6.2325 | 17.9048 | -17.2635 | -8.5565 | 0.0000 | 26.1400 | 13.2300 | 12.910 |
VC-0.5 vs OJ-1.0 | 9.7401 | 16.1408 | 11.5185 | 17.9215 | 0.0000 | 7.9800 | 22.7000 | -14.720 |
VC-0.5 vs OJ-2.0 | 14.9665 | 17.9793 | 15.5418 | 20.6182 | 0.0000 | 7.9800 | 26.0600 | -18.080 |
VC-1.0 vs OJ-1.0 | 4.0328 | 15.3577 | 2.8021 | 9.0579 | 0.0010 | 16.7700 | 22.7000 | -5.930 |
VC-2.0 vs OJ-1.0 | -1.7574 | 17.2972 | -7.5643 | 0.6843 | 0.0965 | 26.1400 | 22.7000 | 3.440 |
VC-1.0 vs OJ-2.0 | 8.0325 | 17.9476 | 6.8597 | 11.7203 | 0.0000 | 16.7700 | 26.0600 | -9.290 |
VC-2.0 vs OJ-2.0 | -0.0461 | 14.0398 | -3.7981 | 3.6381 | 0.9639 | 26.1400 | 26.0600 | 0.080 |
Therefore the tests failed to reject the null-hypothesis (difference in means is equal to zero) in three of the cases. Since all test were held on a \(95\%\) confidence level, any values with a p-value greater than \(0.05\) or the confidence intervals contains the value zero will fail to reject. The results are,
VC vs OJ
, p-value of \(0.0606\)VC-2.0 vs OJ-1.0
, p-value of \(0.0965\)VC-2.0 vs OJ-2.0
, p-value of \(0.9639\)Based on the analysis performed we can conclude that as dose
levels increase the tooth growth also increases. We can also conclude at the lower dose levels there is a statistical difference in the means of the 0.5 and 1.0 mg dose levels for OJ
and VC
with OJ
being more effective on tooth growth, but at 2.0 mg OJ
and VC
has no significant difference.
Further study could be conducted to see if higher dosages (above 2.0 mg) would be more effective for VC
or OJ
. The mean of OJ
seems to be decreasing at a higher rate than VC
and can be seen in the difference of means for the dose levels. VC
may be more effective than OJ
at higher levels.
These assumptions are based on the following: