Add HW2 and HW3

2025-12-25 21:11:55 -04:00
parent effc930a7d
commit 7ae72064a4
2 changed files with 191 additions and 0 deletions
--- a/HW2.Rmd
+++ b/HW2.Rmd
@@ -9,8 +9,90 @@ output:
    df_print: paged
 ---

+```{r}
+library(tidyverse)
+dd <- beaver2
+```
+
 # Question 1

+mu_0 = mean temperature when activ = 0
+
+mu_1 = mean temperature when activ = 1
+
+$$ H_0 : \mu_0 = \mu_1, \space H_1 : \mu_0 \neq \mu_1 $$
+
+```{r}
+t.test(temp ~ activ, data = dd)
+```
+
+Reject H_0, accept H_1, we conclude that mean temperatures differ by activity level
+
+Now manually:
+
+```{r}
+summary_stats <- dd %>%
+  group_by(activ) %>%
+  summarise(
+    n = n(),
+    mean_temp = mean(temp),
+    var_temp = var(temp)
+  )
+
+summary_stats
+```
+
+Compute Standard Error and t stat
+
+```{r}
+x0 <- summary_stats$mean_temp[summary_stats$activ == 0]
+x1 <- summary_stats$mean_temp[summary_stats$activ == 1]
+
+s0 <- summary_stats$var_temp[summary_stats$activ == 0]
+s1 <- summary_stats$var_temp[summary_stats$activ == 1]
+
+n0 <- summary_stats$n[summary_stats$activ == 0]
+n1 <- summary_stats$n[summary_stats$activ == 1]
+
+SE <- sqrt(s0 / n0 + s1 / n1)
+
+t_stat <- (x0 - x1) / SE
+t_stat
+```
+
+Compute DF
+
+```{r}
+df <- (s0/n0 + s1/n1)^2 /
+  ((s0/n0)^2/(n0-1) + (s1/n1)^2/(n1-1))
+
+df
+```
+
+Compute p-value
+
+```{r}
+p_value <- 2 * pt(-abs(t_stat), df)
+p_value
+```
+
+This p value also matches the conclusion that t.test reaches, reject H_0, accept H_1. We conclude that mean temperatures differ by activity level

 # Question 2

+```{r}
+dd <- iris %>%
+  filter(Species %in% c("setosa", "versicolor"))
+```
+
+mu_0 = mean Sepal.Length for setosa
+
+mu_1 = mean Sepal.Length for versicolor
+
+$$ H_0 : \mu_0 = \mu_1, \space H_1 : \mu_0 \neq \mu_1 $$
+
+```{r}
+t.test(Sepal.Length ~ Species, data = dd)
+```
+
+p-value \< 0.05, reject H_0, accept H_1. This indicates a statistically significant difference in mean Sepal.Length between setosa and versicolor.
--- a/HW3.Rmd
+++ b/HW3.Rmd
@@ -0,0 +1,109 @@
+---
+title: "Assignment 3"
+subtitle: "STAT3373"
+author: "Isaac Shoebottom"
+date: "Oct 2nd, 2025"
+output:
+  pdf_document: default
+  html_document:
+    df_print: paged
+---
+```{r message=FALSE, warning=FALSE}
+library(tidyverse)
+library(broom)
+library(car)
+library(effectsize)
+library(knitr)
+library(kableExtra)
+```
+
+```{r}
+# Create the fertilizer dataset
+fertilizer_data <- data.frame(
+  yield = c(4.2, 4.5, 4.1, 4.8, 4.3,  # Fertilizer A
+            5.1, 5.3, 4.9, 5.2, 5.0,  # Fertilizer B
+            3.8, 4.0, 3.6, 4.2, 3.9,  # Fertilizer C
+            5.5, 5.8, 5.4, 5.7, 5.6), # Fertilizer D
+  fertilizer = factor(rep(c("A", "B", "C", "D"), each = 5))
+)
+
+kable(fertilizer_data, caption = "Tomato Yield Data") %>%
+  kable_styling(bootstrap_options = c("striped", "hover"))
+
+fertilizer_data %>%
+  group_by(fertilizer) %>%
+  summarise(
+    mean = mean(yield),
+    sd = sd(yield),
+    n = n()
+  )
+```
+
+# Question 1
+
+## a)
+
+```{r}
+ggplot(fertilizer_data, aes(x = fertilizer, y = yield)) +
+  geom_boxplot(fill = "lightblue") +
+  geom_jitter(width = 0.1) +
+  labs(title = "Tomato Yield by Fertilizer",
+       y = "Yield (kg)",
+       x = "Fertilizer")
+```
+
+Analysis:
+
+-   Fertilizer D appears to produce the highest yields.
+
+-   Fertilizer C appears lowest.
+
+-   Variability is similar across groups.
+
+## b)
+
+```{r}
+fertilizer_aov <- aov(yield ~ fertilizer, data = fertilizer_data)
+summary(fertilizer_aov)
+```
+
+p-value \< 0.05, reject H_0. There is a statistically significant difference in mean yield among fertilizers
+
+# Question 2
+
+## a)
+
+```{r}
+teaching_data <- tibble(
+  score = c(78,82,75,80,77,83,79,
+            85,88,84,87,86,90,83,
+            81,79,83,85,82,78,80),
+  method = factor(rep(c("Traditional", "Interactive", "Online"), each = 7))
+)
+```
+
+## b)
+
+```{r}
+teaching_aov <- aov(score ~ method, data = teaching_data)
+summary(teaching_aov)
+```
+
+p-value \< 0.05, reject H_0, significant differences exist.
+
+# Question 3
+
+```{r}
+supplier_data <- tibble(
+  strength = c(245,250,248,252,249,247,
+               240,238,242,241,239,243,
+               255,258,254,257,256,253,
+               248,246,250,249,247,251),
+  supplier = factor(rep(paste("Supplier", 1:4), each = 6))
+)
+
+supplier_aov <- aov(strength ~ supplier, data = supplier_data)
+summary(supplier_aov)
+```
+
+p-value \< 0.01, reject H_0, significant differences exist