Convergence of Carbon and Mercury Cycle By-products Across Boreal Rivers of Québec, Canada

Authors
Affiliation

Audrey Campeau

Université de Montréal

M. Amyot, P. del Giorgio, C. Fink-Mercier, M. Wauthy, D.E. Ponton, and J-F. Lapierre

Published

December 17, 2024

1 Visualisation of the database

👈 see code here
# Importer libraries
library(tidyverse) 
library(plotly) 
library(readxl) 
library(DT) # for interactive datatables

library(leaflet) #for interactive maps
library(htmltools)

#Open dataset
data <- readRDS(file = "Outputs/Data/qc_subset.rds")


# Define Color Ramp and theme
colorramp <- c("#FC4E07",  "#E69F00" ,"#00AFBB") # Define Color ramp
theme_set(theme_bw(base_size = 16))

#Present the dataset with interactivitiy
datatable(data, options = list(pageLength = 5), filter="top") %>%  
  formatStyle(columns = colnames(.$x$data), `font-size` = '10px')

2 Map of the river sampling locations

👈 see code here
map=leaflet(data = filter(data, !is.na(DHg_Mean))  %>% 
              mutate_if(is.numeric, round, digits = 2)) %>%
  addTiles() %>%
  addProviderTiles("OpenTopoMap")%>% #Esri.WorldImagery
  
  addCircleMarkers(
    ~GPS_W, ~GPS_N, 
    radius=~5 , 
    color= c(rep(colorramp[2], 49 ), #HM
             rep(colorramp[3],365), #RR
             rep(colorramp[1],123)), #EI
    #label = ~htmlEscape(RiverName), 
    label = ~lapply(seq_len(nrow(data)), function(i) {
      htmltools::HTML(sprintf(
        "<strong>River Name:</strong> %s<br/>
         <strong>Position in River:</strong> %s<br/>
         <strong>DHg (ng/L):</strong> %s<br/>
         <strong>DOC (mg/L):</strong> %s",
        RiverName[i],
        Position_in_River[i],
        DHg_Mean[i], 
        DOC_Mean[i]
        ))# Replace with actual variable name
    }),
    labelOptions = labelOptions(
      style = list("font-weight" = "normal", padding = "3px 8px"),
      textsize = "15px",
      direction = "auto"
    ),
    stroke = F,
    fillOpacity = 1
  ) %>%
  addLegend("bottomright", 
            colors = colorramp,
            labels = c("RR", "HM","EI"),
            title = "Region")
map

3 Lolipop plot of correlation coefficients

👈 see code here
# Extract Correlation Parameters


library(dplyr)
library(broom)
library(tidyr)

# Function to perform correlation test and return tidy results
cor_test_tidy <- function(data, x, y, region = NULL) {
  cor_test <- cor.test(data[[x]], data[[y]], method = "spearman", exact = FALSE)
  tidy_result <- tidy(cor_test) %>%
    mutate(
      x_var = x,
      y_var = y,
      region = region
    ) %>%
    select(region, x_var, y_var, estimate, p.value)
  return(tidy_result)
}

# Function to extract correlation parameters
extract_correlations <- function(data, x_var, y_var, region_col = "Region", region_order = c("EI", "RR", "HM", "Overall")) {
  correlation_table <- bind_rows(
    data %>%
      group_by(!!sym(region_col)) %>%
      group_modify(~cor_test_tidy(.x, x_var, y_var, .y[[region_col]])),
    cor_test_tidy(data, x_var, y_var, "Overall")
  ) %>%
    arrange(factor(region, levels = region_order))
  
  return(correlation_table)
}

# Create a table for each correlations :
 correlation_table_1 <- extract_correlations(data, "DHg_Mean", "DOC_Mean")
 correlation_table_2 <- extract_correlations(data, "DHg_Mean", "C1")
 correlation_table_3 <- extract_correlations(data, "DHg_Mean", "C4")
 correlation_table_4 <- extract_correlations(data, "DMeHg_DHg_ratio", "DOC_Mean")
 correlation_table_5 <- extract_correlations(data, "DMeHg_DHg_ratio", "C1")
 correlation_table_6 <- extract_correlations(data, "DMeHg_DHg_ratio", "C4")
 
 correlation_table_7 <- extract_correlations(data, "DMeHg_DHg_DMeHG_ratio","Cgas_DOC_ratio")
 correlation_table_8 <- extract_correlations(data, "DMeHg_Mean", "CH4_plus_MOX")
 correlation_table_9 <- extract_correlations(data, "DMeHg_Mean", "CH4_mgCL")
 
 #Combine 
 correlation_table_all= rbind(correlation_table_1, correlation_table_2,correlation_table_3,
                              correlation_table_4,correlation_table_5,correlation_table_6,
                              correlation_table_7,correlation_table_8, correlation_table_9)
 
# Display the table
#datatable(correlation_table_all)

# Create a label combining x and y variables
correlation_table_all_2 <- correlation_table_all %>%
  mutate(label = paste(x_var, y_var, sep = " vs "))

# Create size categories based on p-value
correlation_table_all_2 <- correlation_table_all_2 %>%
  mutate(p_value_size_cat = case_when(
    p.value < 0.001 ~ "<0.001",
    p.value < 0.01 ~ "<0.01",
    p.value < 0.05 ~ "<0.05",
    TRUE ~ "N.S."
  ))

# Create a separate dataframe for overall estimates
overall_estimates <- correlation_table_all_2 %>%
  filter(region == "Overall") %>% #Keep only overall estimates
  arrange(estimate) %>% #arrange it in ascendent order
  mutate(order = row_number()) #apply this order to the row number

# Join the order back to the main dataframe
correlation_table_all_2 <- correlation_table_all_2 %>% 
  left_join(overall_estimates %>% select(label, order), by = "label") #combine the ordered estimates with the complete correlation table based on the labels


# Reorder the region factor
correlation_table_all_2 <- correlation_table_all_2 %>%
  mutate(region = factor(region, levels = c("EI", "HM", "RR", "Overall"))) # make sure the region order is EI, HM, RR, Overall

# Create a new column with formatted labels
correlation_table_all_2 <- correlation_table_all_2 %>%
  mutate(label_formatted = case_when(
    label == "DHg_Mean vs C1" ~ "[Hg] vs C1(%)",
    label == "DHg_Mean vs C4" ~ "[Hg] vs C4(%)",
    label == "DHg_Mean vs DOC_Mean" ~ "[Hg] vs [DOC]",
    label == "DMeHg_DHg_DMeHG_ratio vs Cgas_DOC_ratio" ~ "MeHg:Hg vs Cgas:DOC",
    label == "DMeHg_DHg_ratio vs C1" ~ "MeHg:Hg vs C1(%)",
    label == "DMeHg_DHg_ratio vs C4" ~ "MeHg:Hg vs C4(%)",
    label == "DMeHg_DHg_ratio vs DOC_Mean" ~ "MeHg:Hg vs [DOC]",
    label == "DMeHg_Mean vs CH4_mgCL" ~ "[MeHg] vs [CH4]",
    label == "DMeHg_Mean vs CH4_plus_MOX" ~ "[MeHg] vs [CH4 + MOX]",
    TRUE ~ label  # Keep original label if not specified above
  ))

# Create the lollipop plot
lollipop= ggplot(correlation_table_all_2, 
                  aes(x = estimate, y = reorder(label_formatted, order))) +
  
                  geom_segment(aes(x = 0, xend = estimate, yend = label_formatted), color = "black") +
                  geom_point(aes(fill = region, size = p_value_size_cat), alpha = 0.7, shape = 21) +
                  geom_point(aes(size = p_value_size_cat), color = "black", shape = 21) +
                  geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
  
                  scale_fill_manual(values = c(colorramp, "black")) +
                  scale_size_discrete(range = c(6, 2)) +
                  scale_x_continuous(limits=c(-0.8,0.8),breaks = seq(-1, 1, 0.2)) +
  
                  theme_minimal(base_size = 16) +
                  theme(
                    axis.text.x = element_text(color = "black"),  # Also changed x-axis text to black for consistency
                    axis.text.y = element_text(angle = 0, hjust = 1, color = "black"),
                        legend.position = "top",
                        plot.title = element_text(size = 20)) +
                  
                  labs(x = "Spearman rho", y = "", fill = "Region", size = "p-value")+
                  guides(
                    fill = "none",  # Increase size for fill legend
                    size = guide_legend(override.aes = list(fill = "black"))  # Make size legend points black
                  )


ggplotly(lollipop, tooltip = c("x","size"))