Add HW4 and HW5
This commit is contained in:
186
HW4.Rmd
Normal file
186
HW4.Rmd
Normal file
@@ -0,0 +1,186 @@
|
|||||||
|
---
|
||||||
|
title: "Assignment 3"
|
||||||
|
subtitle: "STAT3373"
|
||||||
|
author: "Isaac Shoebottom"
|
||||||
|
date: "Oct 16th, 2025"
|
||||||
|
output:
|
||||||
|
pdf_document: default
|
||||||
|
html_document:
|
||||||
|
df_print: paged
|
||||||
|
---
|
||||||
|
|
||||||
|
```{r message=FALSE, warning=FALSE}
|
||||||
|
library(tidyverse)
|
||||||
|
library(knitr)
|
||||||
|
```
|
||||||
|
|
||||||
|
# Question 1
|
||||||
|
|
||||||
|
## a)
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
# Create the dataset
|
||||||
|
data <- tibble(
|
||||||
|
Farm = factor(1:4),
|
||||||
|
Fert1 = c(48, 45, 52, 44),
|
||||||
|
Fert2 = c(55, 50, 58, 49),
|
||||||
|
Fert3 = c(52, 49, 55, 47)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Convert to long format
|
||||||
|
long_data <- data %>%
|
||||||
|
pivot_longer(
|
||||||
|
cols = starts_with("Fert"),
|
||||||
|
names_to = "Fertilizer",
|
||||||
|
values_to = "Yield"
|
||||||
|
) %>%
|
||||||
|
mutate(Fertilizer = factor(Fertilizer))
|
||||||
|
|
||||||
|
kable(long_data, caption = "Yield Data (Bushels per Acre)")
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
## b)
|
||||||
|
|
||||||
|
Model: $$Y_{ij} = \mu + \tau_i + \beta_j + \varepsilon_{ij}$$
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
anova_model <- aov(Yield ~ Fertilizer + Farm, data = long_data)
|
||||||
|
|
||||||
|
anova_table <- summary(anova_model)
|
||||||
|
anova_table
|
||||||
|
```
|
||||||
|
|
||||||
|
Conclusions:
|
||||||
|
|
||||||
|
- Fertilizer effect is significant (p \< 0.05)
|
||||||
|
|
||||||
|
- Farm (block) effect is also significant
|
||||||
|
|
||||||
|
## c)
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
tukey_results <- TukeyHSD(anova_model, "Fertilizer")
|
||||||
|
tukey_results
|
||||||
|
```
|
||||||
|
|
||||||
|
Results:
|
||||||
|
|
||||||
|
- Fertilizer 2 produces the highest yields
|
||||||
|
|
||||||
|
- All fertilizer pairs differ significantly
|
||||||
|
|
||||||
|
- Ordering of mean yields: Fert 2 \> Fert 3 \> Fert 1
|
||||||
|
|
||||||
|
Final Conclusion (alpha = 0.05)
|
||||||
|
|
||||||
|
- There is strong statistical evidence that fertilizer type affects yield.
|
||||||
|
|
||||||
|
- Blocking by farm was appropriate and reduced error variability.
|
||||||
|
|
||||||
|
- Fertilizer 2 is the most effective option based on yield.
|
||||||
|
|
||||||
|
# Question 2
|
||||||
|
|
||||||
|
## a)
|
||||||
|
```{r}
|
||||||
|
drug_data <- data.frame(
|
||||||
|
patient = factor(rep(1:5, each = 3)),
|
||||||
|
drug = factor(rep(c("A", "B", "C"), times = 5)),
|
||||||
|
response_time = c(
|
||||||
|
12, 10, 15, # Patient 1
|
||||||
|
14, 11, 16, # Patient 2
|
||||||
|
10, 8, 13, # Patient 3
|
||||||
|
13, 10, 14, # Patient 4
|
||||||
|
11, 9, 14 # Patient 5
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
kable(drug_data, caption = "Drug Trial Response Times (seconds)")
|
||||||
|
```
|
||||||
|
|
||||||
|
## b)
|
||||||
|
|
||||||
|
Model: $$Y_{ij} = \mu + \tau_i + \beta_j + \varepsilon_{ij}$$
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
anova_model <- aov(response_time ~ drug + patient, data = drug_data)
|
||||||
|
summary(anova_model)
|
||||||
|
```
|
||||||
|
|
||||||
|
Decision (alpha = 0.05):
|
||||||
|
|
||||||
|
- Drug effect is significant
|
||||||
|
|
||||||
|
- Patient (block) effect is significant
|
||||||
|
|
||||||
|
## c)
|
||||||
|
Residual Diagnostics
|
||||||
|
```{r}
|
||||||
|
par(mfrow = c(1, 2))
|
||||||
|
plot(anova_model, which = 1) # Residuals vs Fitted
|
||||||
|
plot(anova_model, which = 2) # Normal Q-Q
|
||||||
|
par(mfrow = c(1, 1))
|
||||||
|
```
|
||||||
|
|
||||||
|
Formal Tests
|
||||||
|
```{r}
|
||||||
|
# Normality of residuals
|
||||||
|
shapiro.test(residuals(anova_model))
|
||||||
|
|
||||||
|
# Homogeneity of variance
|
||||||
|
bartlett.test(response_time ~ drug, data = drug_data)
|
||||||
|
```
|
||||||
|
|
||||||
|
Results:
|
||||||
|
|
||||||
|
- Residuals are approximately normally distributed
|
||||||
|
|
||||||
|
- Variances across drug groups are homogeneous
|
||||||
|
|
||||||
|
## d)
|
||||||
|
Multiple Comparisons
|
||||||
|
```{r}
|
||||||
|
tukey_results <- TukeyHSD(anova_model, "drug")
|
||||||
|
tukey_results
|
||||||
|
```
|
||||||
|
|
||||||
|
Results:
|
||||||
|
|
||||||
|
- All drug pairs differ significantly
|
||||||
|
|
||||||
|
- Ordering of mean response times: Drug B < Drug A < Drug C
|
||||||
|
|
||||||
|
## e)
|
||||||
|
Mean Response Times by Drug
|
||||||
|
```{r}
|
||||||
|
drug_data %>%
|
||||||
|
group_by(drug) %>%
|
||||||
|
summarise(mean_time = mean(response_time)) %>%
|
||||||
|
ggplot(aes(x = drug, y = mean_time)) +
|
||||||
|
geom_col(fill = "steelblue") +
|
||||||
|
labs(
|
||||||
|
title = "Mean Response Time by Drug",
|
||||||
|
x = "Drug",
|
||||||
|
y = "Mean Response Time (seconds)"
|
||||||
|
) +
|
||||||
|
theme_minimal()
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
Boxplot by Drug
|
||||||
|
```{r}
|
||||||
|
ggplot(drug_data, aes(x = drug, y = response_time)) +
|
||||||
|
geom_boxplot(fill = "lightgray") +
|
||||||
|
labs(
|
||||||
|
title = "Response Time Distribution by Drug",
|
||||||
|
x = "Drug",
|
||||||
|
y = "Response Time (seconds)"
|
||||||
|
) +
|
||||||
|
theme_minimal()
|
||||||
|
```
|
||||||
|
|
||||||
|
## f)
|
||||||
|
Conclusion:
|
||||||
|
|
||||||
|
At the 5% significance level, there is strong evidence that drug formulation affects patient response time. Blocking by patient was effective and significantly reduced unexplained variability. Post-hoc analysis using Tukey’s HSD showed that all three drugs differ significantly, with Drug B producing the fastest (best) response times, followed by Drug A, and then Drug C.
|
||||||
127
HW5.Rmd
Normal file
127
HW5.Rmd
Normal file
@@ -0,0 +1,127 @@
|
|||||||
|
---
|
||||||
|
title: "Assignment 3"
|
||||||
|
subtitle: "STAT3373"
|
||||||
|
author: "Isaac Shoebottom"
|
||||||
|
date: "Oct 16th, 2025"
|
||||||
|
output:
|
||||||
|
pdf_document: default
|
||||||
|
html_document:
|
||||||
|
df_print: paged
|
||||||
|
---
|
||||||
|
|
||||||
|
```{r message=FALSE, warning=FALSE}
|
||||||
|
library(tidyverse)
|
||||||
|
```
|
||||||
|
|
||||||
|
# Question 1
|
||||||
|
|
||||||
|
## a)
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
latin_data <- tribble(
|
||||||
|
~sunlight, ~soil, ~treatment, ~growth,
|
||||||
|
"Sunny", "Sandy", "C", 45,
|
||||||
|
"Sunny", "Loamy", "A", 52,
|
||||||
|
"Sunny", "Clay", "B", 48,
|
||||||
|
"Sunny", "Sandy", "B", 43,
|
||||||
|
"Sunny", "Loamy", "C", 50,
|
||||||
|
"Sunny", "Clay", "A", 49,
|
||||||
|
"Sunny", "Sandy", "A", 47,
|
||||||
|
"Sunny", "Loamy", "B", 51,
|
||||||
|
"Sunny", "Clay", "C", 46,
|
||||||
|
|
||||||
|
"Partial Sun", "Sandy", "A", 41,
|
||||||
|
"Partial Sun", "Loamy", "B", 44,
|
||||||
|
"Partial Sun", "Clay", "C", 38,
|
||||||
|
"Partial Sun", "Sandy", "B", 39,
|
||||||
|
"Partial Sun", "Loamy", "C", 42,
|
||||||
|
"Partial Sun", "Clay", "A", 40,
|
||||||
|
"Partial Sun", "Sandy", "C", 40,
|
||||||
|
"Partial Sun", "Loamy", "A", 43,
|
||||||
|
"Partial Sun", "Clay", "B", 41,
|
||||||
|
|
||||||
|
"Shade", "Sandy", "B", 32,
|
||||||
|
"Shade", "Loamy", "C", 35,
|
||||||
|
"Shade", "Clay", "A", 33,
|
||||||
|
"Shade", "Sandy", "C", 31,
|
||||||
|
"Shade", "Loamy", "A", 36,
|
||||||
|
"Shade", "Clay", "B", 34,
|
||||||
|
"Shade", "Sandy", "A", 30,
|
||||||
|
"Shade", "Loamy", "B", 33,
|
||||||
|
"Shade", "Clay", "C", 32
|
||||||
|
)
|
||||||
|
|
||||||
|
latin_data
|
||||||
|
```
|
||||||
|
|
||||||
|
## b)
|
||||||
|
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
treatment_means <- latin_data %>%
|
||||||
|
group_by(treatment) %>%
|
||||||
|
summarise(mean_growth = mean(growth))
|
||||||
|
|
||||||
|
treatment_means
|
||||||
|
```
|
||||||
|
|
||||||
|
## c)
|
||||||
|
|
||||||
|
Mean Growth by Sunlight Level
|
||||||
|
```{r}
|
||||||
|
sunlight_means <- latin_data %>%
|
||||||
|
group_by(sunlight) %>%
|
||||||
|
summarise(mean_growth = mean(growth))
|
||||||
|
|
||||||
|
sunlight_means
|
||||||
|
```
|
||||||
|
|
||||||
|
Mean Growth by Soil Type
|
||||||
|
```{r}
|
||||||
|
soil_means <- latin_data %>%
|
||||||
|
group_by(soil) %>%
|
||||||
|
summarise(mean_growth = mean(growth))
|
||||||
|
|
||||||
|
soil_means
|
||||||
|
```
|
||||||
|
|
||||||
|
## d)
|
||||||
|
|
||||||
|
From the descriptive statistics:
|
||||||
|
|
||||||
|
Sunlight effects: Growth is highest under Sunny, moderate under Partial Sun, and lowest under Shade. This suggests sunlight has a strong positive effect on growth.
|
||||||
|
|
||||||
|
Soil effects: Loamy soil consistently produces higher growth than Sandy or Clay, indicating soil type is an important blocking factor.
|
||||||
|
|
||||||
|
Treatment effects: If the treatment means differ noticeably:
|
||||||
|
|
||||||
|
- The watering schedule with the highest mean appears most effective.
|
||||||
|
|
||||||
|
- Smaller differences suggest weaker treatment effects relative to blocking factors.
|
||||||
|
|
||||||
|
- Because the Latin square controls for sunlight and soil, observed treatment differences are less confounded.
|
||||||
|
|
||||||
|
## e)
|
||||||
|
A Latin square ANOVA partitions variation into:
|
||||||
|
|
||||||
|
- Sunlight (row effect)
|
||||||
|
|
||||||
|
- Soil type (column effect)
|
||||||
|
|
||||||
|
- Treatment effect
|
||||||
|
|
||||||
|
- Error
|
||||||
|
|
||||||
|
Expected Findings:
|
||||||
|
|
||||||
|
- Sunlight effect: Likely highly significant, given the strong gradient from Sunny to Shade.
|
||||||
|
|
||||||
|
- Soil effect: Likely significant, especially if Loamy soil dominates.
|
||||||
|
|
||||||
|
- Treatment effect:
|
||||||
|
|
||||||
|
- Possibly significant if Daily watering shows consistently higher growth.
|
||||||
|
|
||||||
|
- Could be marginal if differences among watering schedules are small compared to sunlight and soil.
|
||||||
|
|
||||||
|
- Error variance: Expected to be relatively small due to strong blocking.
|
||||||
Reference in New Issue
Block a user