99 lines
1.8 KiB
Plaintext
99 lines
1.8 KiB
Plaintext
---
|
|
title: "Assignment 2"
|
|
subtitle: "STAT3373"
|
|
author: "Isaac Shoebottom"
|
|
date: "Sept 25th, 2025"
|
|
output:
|
|
html_document:
|
|
df_print: paged
|
|
pdf_document: default
|
|
---
|
|
|
|
```{r}
|
|
library(tidyverse)
|
|
dd <- beaver2
|
|
```
|
|
|
|
# Question 1
|
|
|
|
mu_0 = mean temperature when activ = 0
|
|
|
|
mu_1 = mean temperature when activ = 1
|
|
|
|
$$ H_0 : \mu_0 = \mu_1, \space H_1 : \mu_0 \neq \mu_1 $$
|
|
|
|
```{r}
|
|
t.test(temp ~ activ, data = dd)
|
|
```
|
|
|
|
Reject H_0, accept H_1, we conclude that mean temperatures differ by activity level
|
|
|
|
Now manually:
|
|
|
|
```{r}
|
|
summary_stats <- dd %>%
|
|
group_by(activ) %>%
|
|
summarise(
|
|
n = n(),
|
|
mean_temp = mean(temp),
|
|
var_temp = var(temp)
|
|
)
|
|
|
|
summary_stats
|
|
```
|
|
|
|
Compute Standard Error and t stat
|
|
|
|
```{r}
|
|
x0 <- summary_stats$mean_temp[summary_stats$activ == 0]
|
|
x1 <- summary_stats$mean_temp[summary_stats$activ == 1]
|
|
|
|
s0 <- summary_stats$var_temp[summary_stats$activ == 0]
|
|
s1 <- summary_stats$var_temp[summary_stats$activ == 1]
|
|
|
|
n0 <- summary_stats$n[summary_stats$activ == 0]
|
|
n1 <- summary_stats$n[summary_stats$activ == 1]
|
|
|
|
SE <- sqrt(s0 / n0 + s1 / n1)
|
|
|
|
t_stat <- (x0 - x1) / SE
|
|
t_stat
|
|
```
|
|
|
|
Compute DF
|
|
|
|
```{r}
|
|
df <- (s0/n0 + s1/n1)^2 /
|
|
((s0/n0)^2/(n0-1) + (s1/n1)^2/(n1-1))
|
|
|
|
df
|
|
```
|
|
|
|
Compute p-value
|
|
|
|
```{r}
|
|
p_value <- 2 * pt(-abs(t_stat), df)
|
|
p_value
|
|
```
|
|
|
|
This p value also matches the conclusion that t.test reaches, reject H_0, accept H_1. We conclude that mean temperatures differ by activity level
|
|
|
|
# Question 2
|
|
|
|
```{r}
|
|
dd <- iris %>%
|
|
filter(Species %in% c("setosa", "versicolor"))
|
|
```
|
|
|
|
mu_0 = mean Sepal.Length for setosa
|
|
|
|
mu_1 = mean Sepal.Length for versicolor
|
|
|
|
$$ H_0 : \mu_0 = \mu_1, \space H_1 : \mu_0 \neq \mu_1 $$
|
|
|
|
```{r}
|
|
t.test(Sepal.Length ~ Species, data = dd)
|
|
```
|
|
|
|
p-value \< 0.05, reject H_0, accept H_1. This indicates a statistically significant difference in mean Sepal.Length between setosa and versicolor.
|