Add HW2 and HW3

This commit is contained in:
2025-12-25 21:11:55 -04:00
parent effc930a7d
commit 7ae72064a4
2 changed files with 191 additions and 0 deletions

82
HW2.Rmd
View File

@@ -9,8 +9,90 @@ output:
df_print: paged
---
```{r}
library(tidyverse)
dd <- beaver2
```
# Question 1
mu_0 = mean temperature when activ = 0
mu_1 = mean temperature when activ = 1
$$ H_0 : \mu_0 = \mu_1, \space H_1 : \mu_0 \neq \mu_1 $$
```{r}
t.test(temp ~ activ, data = dd)
```
Reject H_0, accept H_1, we conclude that mean temperatures differ by activity level
Now manually:
```{r}
summary_stats <- dd %>%
group_by(activ) %>%
summarise(
n = n(),
mean_temp = mean(temp),
var_temp = var(temp)
)
summary_stats
```
Compute Standard Error and t stat
```{r}
x0 <- summary_stats$mean_temp[summary_stats$activ == 0]
x1 <- summary_stats$mean_temp[summary_stats$activ == 1]
s0 <- summary_stats$var_temp[summary_stats$activ == 0]
s1 <- summary_stats$var_temp[summary_stats$activ == 1]
n0 <- summary_stats$n[summary_stats$activ == 0]
n1 <- summary_stats$n[summary_stats$activ == 1]
SE <- sqrt(s0 / n0 + s1 / n1)
t_stat <- (x0 - x1) / SE
t_stat
```
Compute DF
```{r}
df <- (s0/n0 + s1/n1)^2 /
((s0/n0)^2/(n0-1) + (s1/n1)^2/(n1-1))
df
```
Compute p-value
```{r}
p_value <- 2 * pt(-abs(t_stat), df)
p_value
```
This p value also matches the conclusion that t.test reaches, reject H_0, accept H_1. We conclude that mean temperatures differ by activity level
# Question 2
```{r}
dd <- iris %>%
filter(Species %in% c("setosa", "versicolor"))
```
mu_0 = mean Sepal.Length for setosa
mu_1 = mean Sepal.Length for versicolor
$$ H_0 : \mu_0 = \mu_1, \space H_1 : \mu_0 \neq \mu_1 $$
```{r}
t.test(Sepal.Length ~ Species, data = dd)
```
p-value \< 0.05, reject H_0, accept H_1. This indicates a statistically significant difference in mean Sepal.Length between setosa and versicolor.

109
HW3.Rmd Normal file
View File

@@ -0,0 +1,109 @@
---
title: "Assignment 3"
subtitle: "STAT3373"
author: "Isaac Shoebottom"
date: "Oct 2nd, 2025"
output:
pdf_document: default
html_document:
df_print: paged
---
```{r message=FALSE, warning=FALSE}
library(tidyverse)
library(broom)
library(car)
library(effectsize)
library(knitr)
library(kableExtra)
```
```{r}
# Create the fertilizer dataset
fertilizer_data <- data.frame(
yield = c(4.2, 4.5, 4.1, 4.8, 4.3, # Fertilizer A
5.1, 5.3, 4.9, 5.2, 5.0, # Fertilizer B
3.8, 4.0, 3.6, 4.2, 3.9, # Fertilizer C
5.5, 5.8, 5.4, 5.7, 5.6), # Fertilizer D
fertilizer = factor(rep(c("A", "B", "C", "D"), each = 5))
)
kable(fertilizer_data, caption = "Tomato Yield Data") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
fertilizer_data %>%
group_by(fertilizer) %>%
summarise(
mean = mean(yield),
sd = sd(yield),
n = n()
)
```
# Question 1
## a)
```{r}
ggplot(fertilizer_data, aes(x = fertilizer, y = yield)) +
geom_boxplot(fill = "lightblue") +
geom_jitter(width = 0.1) +
labs(title = "Tomato Yield by Fertilizer",
y = "Yield (kg)",
x = "Fertilizer")
```
Analysis:
- Fertilizer D appears to produce the highest yields.
- Fertilizer C appears lowest.
- Variability is similar across groups.
## b)
```{r}
fertilizer_aov <- aov(yield ~ fertilizer, data = fertilizer_data)
summary(fertilizer_aov)
```
p-value \< 0.05, reject H_0. There is a statistically significant difference in mean yield among fertilizers
# Question 2
## a)
```{r}
teaching_data <- tibble(
score = c(78,82,75,80,77,83,79,
85,88,84,87,86,90,83,
81,79,83,85,82,78,80),
method = factor(rep(c("Traditional", "Interactive", "Online"), each = 7))
)
```
## b)
```{r}
teaching_aov <- aov(score ~ method, data = teaching_data)
summary(teaching_aov)
```
p-value \< 0.05, reject H_0, significant differences exist.
# Question 3
```{r}
supplier_data <- tibble(
strength = c(245,250,248,252,249,247,
240,238,242,241,239,243,
255,258,254,257,256,253,
248,246,250,249,247,251),
supplier = factor(rep(paste("Supplier", 1:4), each = 6))
)
supplier_aov <- aov(strength ~ supplier, data = supplier_data)
summary(supplier_aov)
```
p-value \< 0.01, reject H_0, significant differences exist