library(tidyverse)
library(here)
library(janitor)
# Packages for cluster analysis:
library(NbClust)
library(cluster)
library(factoextra)
library(dendextend)
library(ggdendro)
To access data, html and Rmd/qmd files:
Load packages
Introduction:
- To perform hierarchical clustering by site, we’ll begin by making a data frame that has a single summary row per site (e.g. based on means from all observations at that site), then we will calculate the euclidean distance before performing complete linkage agglomerative hierarchical clustering.
Read in data
<- read_csv(here("posts", "2021-03-07-hierarchical-clustering","sbc_lter_registered_stream_chemistry.csv")) %>%
stream_chem clean_names() %>%
na_if(-999.0) %>%
group_by(site_code) %>%
summarise(across(nh4_u_m:spec_cond_u_spercm, mean, na.rm = TRUE)) %>%
drop_na()
Rows: 19390 Columns: 12
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): site_code
dbl (10): nh4_uM, no3_uM, po4_uM, tdn_uM, tdp_uM, tpc_uM, tpn_uM, tpp_uM, t...
dttm (1): timestamp_local
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Scale the data
# scale the numeric variables
<- stream_chem %>%
stream_chem_scaled ::select(2:11) %>%
dplyrscale()
# make rownames the site code names from original data set
rownames(stream_chem_scaled) <- stream_chem$site_code
# Compute dissimilarity values (Euclidean distances):
<- dist(stream_chem_scaled, method = "euclidean")
stream_euc_distance
# Check out the output:
# View(euc_distance)
Perform hierarchical clustering by complete linkage with stats::hclust()
# Hierarchical clustering (complete linkage)
<- hclust(stream_euc_distance, method = "complete" )
stream_hc_complete
# Plot it (base plot):
plot(stream_hc_complete, cex = 0.6, hang = -1)
Convert to class: dendogram
<- as.dendrogram(stream_hc_complete) stream_dend_complete
Plot using ggdendrogram()
, a ggplot
wrapper:
# ggplot
ggdendrogram(stream_dend_complete,
rotate = TRUE)+
labs(x = "Santa Barbara area watershed site code") +
ylab(NULL)+
theme_bw() +
theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
Data & Metadata Source:
- SBC LTER: Stream chemistry in the Santa Barbara Coastal drainage area, ongoing since 2000 Creators: Santa Barbara Coastal LTER, & Melack, John M Citation: Santa Barbara Coastal LTER and J. Melack. 2019. SBC LTER: Land: Stream chemistry in the Santa Barbara Coastal drainage area, ongoing since 2000 ver 16. Environmental Data Initiative. https://doi.org/10.6073/pasta/67a558a24ceed9a0a5bf5e46ab841174.