Add HW2 and HW3

2025-12-25 21:11:55 -04:00
parent effc930a7d
commit 7ae72064a4
2 changed files with 191 additions and 0 deletions
--- a/HW2.Rmd
+++ b/HW2.Rmd
@@ -9,8 +9,90 @@ output:
    df_print: paged
 ---
 ```{r}
 library(tidyverse)
 dd <- beaver2
 ```
 # Question 1
 mu_0 = mean temperature when activ = 0
 mu_1 = mean temperature when activ = 1
 $$ H_0 : \mu_0 = \mu_1, \space H_1 : \mu_0 \neq \mu_1 $$
 ```{r}
 t.test(temp ~ activ, data = dd)
 ```
 Reject H_0, accept H_1, we conclude that mean temperatures differ by activity level
 Now manually:
 ```{r}
 summary_stats <- dd %>%
  group_by(activ) %>%
  summarise(
    n = n(),
    mean_temp = mean(temp),
    var_temp = var(temp)
  )
 summary_stats
 ```
 Compute Standard Error and t stat
 ```{r}
 x0 <- summary_stats$mean_temp[summary_stats$activ == 0]
 x1 <- summary_stats$mean_temp[summary_stats$activ == 1]
 s0 <- summary_stats$var_temp[summary_stats$activ == 0]
 s1 <- summary_stats$var_temp[summary_stats$activ == 1]
 n0 <- summary_stats$n[summary_stats$activ == 0]
 n1 <- summary_stats$n[summary_stats$activ == 1]
 SE <- sqrt(s0 / n0 + s1 / n1)
 t_stat <- (x0 - x1) / SE
 t_stat
 ```
 Compute DF
 ```{r}
 df <- (s0/n0 + s1/n1)^2 /
  ((s0/n0)^2/(n0-1) + (s1/n1)^2/(n1-1))
 df
 ```
 Compute p-value
 ```{r}
 p_value <- 2 * pt(-abs(t_stat), df)
 p_value
 ```
 This p value also matches the conclusion that t.test reaches, reject H_0, accept H_1. We conclude that mean temperatures differ by activity level
 # Question 2
 ```{r}
 dd <- iris %>%
  filter(Species %in% c("setosa", "versicolor"))
 ```
 mu_0 = mean Sepal.Length for setosa
 mu_1 = mean Sepal.Length for versicolor
 $$ H_0 : \mu_0 = \mu_1, \space H_1 : \mu_0 \neq \mu_1 $$
 ```{r}
 t.test(Sepal.Length ~ Species, data = dd)
 ```
 p-value \< 0.05, reject H_0, accept H_1. This indicates a statistically significant difference in mean Sepal.Length between setosa and versicolor.
--- a/HW3.Rmd
+++ b/HW3.Rmd
@@ -0,0 +1,109 @@
 ---
 title: "Assignment 3"
 subtitle: "STAT3373"
 author: "Isaac Shoebottom"
 date: "Oct 2nd, 2025"
 output:
  pdf_document: default
  html_document:
    df_print: paged
 ---
 ```{r message=FALSE, warning=FALSE}
 library(tidyverse)
 library(broom)
 library(car)
 library(effectsize)
 library(knitr)
 library(kableExtra)
 ```
 ```{r}
 # Create the fertilizer dataset
 fertilizer_data <- data.frame(
  yield = c(4.2, 4.5, 4.1, 4.8, 4.3,  # Fertilizer A
            5.1, 5.3, 4.9, 5.2, 5.0,  # Fertilizer B
            3.8, 4.0, 3.6, 4.2, 3.9,  # Fertilizer C
            5.5, 5.8, 5.4, 5.7, 5.6), # Fertilizer D
  fertilizer = factor(rep(c("A", "B", "C", "D"), each = 5))
 )
 kable(fertilizer_data, caption = "Tomato Yield Data") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
 fertilizer_data %>%
  group_by(fertilizer) %>%
  summarise(
    mean = mean(yield),
    sd = sd(yield),
    n = n()
  )
 ```
 # Question 1
 ## a)
 ```{r}
 ggplot(fertilizer_data, aes(x = fertilizer, y = yield)) +
  geom_boxplot(fill = "lightblue") +
  geom_jitter(width = 0.1) +
  labs(title = "Tomato Yield by Fertilizer",
       y = "Yield (kg)",
       x = "Fertilizer")
 ```
 Analysis:
 -   Fertilizer D appears to produce the highest yields.
 -   Fertilizer C appears lowest.
 -   Variability is similar across groups.
 ## b)
 ```{r}
 fertilizer_aov <- aov(yield ~ fertilizer, data = fertilizer_data)
 summary(fertilizer_aov)
 ```
 p-value \< 0.05, reject H_0. There is a statistically significant difference in mean yield among fertilizers
 # Question 2
 ## a)
 ```{r}
 teaching_data <- tibble(
  score = c(78,82,75,80,77,83,79,
            85,88,84,87,86,90,83,
            81,79,83,85,82,78,80),
  method = factor(rep(c("Traditional", "Interactive", "Online"), each = 7))
 )
 ```
 ## b)
 ```{r}
 teaching_aov <- aov(score ~ method, data = teaching_data)
 summary(teaching_aov)
 ```
 p-value \< 0.05, reject H_0, significant differences exist.
 # Question 3
 ```{r}
 supplier_data <- tibble(
  strength = c(245,250,248,252,249,247,
               240,238,242,241,239,243,
               255,258,254,257,256,253,
               248,246,250,249,247,251),
  supplier = factor(rep(paste("Supplier", 1:4), each = 6))
 )
 supplier_aov <- aov(strength ~ supplier, data = supplier_data)
 summary(supplier_aov)
 ```
 p-value \< 0.01, reject H_0, significant differences exist