Data overview

Summary

library(renv)
renv::load()
renv::restore() # Install missing librairies.
renv::status() # Check the project state.

library(targets)
library(here)
tar_config_set(
  store = here::here("_targets")
)

library(ggplot2)
set_theme(theme_minimal())
library(dplyr)

Sea survey data

Here is the map of the sampling station for the different surveys.

tar_read(plot_station)

Sea environmental data

Here is the kind of environment data we have for the sea.

env <- tar_read(environment_data)
head(env)
    X         x        y  chl_mean nppv_mean  o2_mean    o2_cv o2_perc_25
1   7 -4.805556 48.50000 1.0947770  38.60499 104.9755 1.708866   103.3277
2   8 -4.777778 48.50000 1.1134708  39.24829 105.0184 1.758806   103.3193
3 185 -4.833333 48.47222 1.1021804  38.82361 104.9885 1.723767   103.3259
4 186 -4.805556 48.47222 1.1298747  39.73487 105.0475 1.792115   103.3152
5 187 -4.777778 48.47222 1.1297860  39.75322 105.0497 1.794766   103.3144
6 362 -4.888889 48.44444 0.9154485  32.93176 104.6185 1.296982   103.3873
  temp_mean  temp_cv temp_perc_90 sali_mean   sali_cv sali_tresh_10
1  14.40799 4.996541     15.06966  34.92360 0.8238475             0
2  14.45692 5.114160     15.13361  34.91428 0.8358042             0
3  14.42153 5.027339     15.08706  34.92090 0.8266643             0
4  14.48728 5.183352     15.17265  34.90825 0.8421609             0
5  14.49050 5.192138     15.17704  34.90770 0.8432401             0
6  14.01894 4.085622     14.56534  34.99931 0.7355736             0
  sali_perc_90 wave_mean bathymetry substrat QmMAnomalie_jan_avr year
1     35.24728   Niveau3  -33.13334 Graviers                   0 1993
2     35.24393   Niveau3  -16.31439 Graviers                   0 1993
3     35.24614   Niveau3  -49.58667 Graviers                   0 1993
4     35.24137   Niveau3  -31.59556 Graviers                   0 1993
5     35.24128   Niveau3  -14.81751 Graviers                   0 1993
6     35.27700   Niveau3  -37.97334 Graviers                   0 1993

We can have a look at the evolution of average temperature with years.

env |>
  group_by(year) |>
  summarise(annual_temperature = mean(temp_mean), .groups = "drop") |>
  ggplot(aes(x = year, y = annual_temperature)) +
  geom_point() +
  geom_line() +
  labs(x = "Year", y = "Mean annual temperature")

However, this is not very informative because we do not account for the spatialization of the data. A better way, to investigate the data is to plot yearly map of the different environmental variables. For example, below we plot the distribution of mean oxygen level, chorophyl concentration, and temperature.

library(tidyr)
library(sf)
library(rnaturalearth)
library(rnaturalearthhires)

coastline <- ne_coastline(scale = "large", returnclass = "sf")

var_list <- c("o2_mean", "nppv_mean", "temp_mean")
year_list <- c(2019, 2020, 2021, 2022, 2023)

env_long <- env |>
  pivot_longer(
    cols = c(
      chl_mean, nppv_mean, o2_mean, o2_cv,
      o2_perc_25, temp_mean, temp_cv, temp_perc_90, sali_mean
    ),
    names_to = "variable",
    values_to = "value"
  ) |>
  group_by(variable) |>
  filter(year %in% year_list & variable %in% var_list) |>
  mutate(value_scaled = scale(value)) |>
  ungroup()

ggplot(env_long, aes(x = x, y = y, fill = value_scaled)) +
  geom_tile() +
  geom_sf(data = coastline, inherit.aes = FALSE, linewidth = 0.3) +
  facet_grid(variable ~ year) +
  scale_fill_viridis_c() +
  coord_sf(xlim = range(env$x), ylim = range(env$y))

We can also plot the salinity for a given year, say 2023. We use a log scale, as salinity changes a lot in the estuary.

env |>
  mutate(log_salinity = log10(sali_mean)) |>
  filter(year == 2023) |>
  ggplot(aes(x = x, y = y, fill = log_salinity)) +
  geom_tile() +
  geom_sf(data = coastline, inherit.aes = FALSE, linewidth = 0.3) +
  scale_fill_viridis_c() +
  coord_sf(xlim = range(env$x), ylim = range(env$y))

We see that different environmental variables seem correlated. We can have a look at them more exhaustively with pairwise scatterplot.

cols <- c(
  "chl_mean", "nppv_mean", "o2_mean", "temp_mean", "sali_mean",
  "bathymetry"
)
year_vals <- c(2000, 2010, 2020)
GGally::ggpairs(
  env |> mutate(year = as.factor(year)) |> filter(year %in% year_vals),
  columns = cols,
  aes(colour = year, alpha = 0.1)
)

First, we see that we have some strong correlations between variables, such as, between oxygen and temperature. Interestingly, there are also changes between the correlation between the variables. In particular, we see that in the recent years, there is now a very strong correlation between chlorophyll and net primary productivity. This seems to be due to an explosion in primary productivity values in recent years. This is probably due to a change in the way the productivity is computed, but let’s further investigate.

year_list <- c(2019, 2020, 2021, 2022, 2023)

ggplot(
  env |> filter(year %in% year_list),
  aes(x = x, y = y, fill = nppv_mean)
) +
  geom_tile() +
  geom_sf(data = coastline, inherit.aes = FALSE, linewidth = 0.3) +
  facet_wrap(~year) +
  scale_fill_viridis_c() +
  coord_sf(xlim = c(-1.5,-0.5), ylim = c(44.8, 45.8))

We see indeed an explosion of the net primary productivity in the Gironde and Loire estuaries.

For comparison let’s plot cholorophyll values.

ggplot(
  env |> filter(year %in% year_list),
  aes(x = x, y = y, fill = chl_mean)
) +
  geom_tile() +
  geom_sf(data = coastline, inherit.aes = FALSE, linewidth = 0.3) +
  facet_wrap(~year) +
  scale_fill_viridis_c() +
  coord_sf(xlim = c(-1.5,-0.5), ylim = c(44.8, 45.8))

Food web structure metrics

Next, we investigate the structure of local food webs. We focus on food web richness, connectance and trophic length.

foodweb <- tar_read(local_foodwebs)
head(foodweb, n = 10)
# A tibble: 10 × 11
   trait    foodweb  log_trophic_richness log_species_richness connectance
   <chr>    <list>                  <dbl>                <dbl>       <dbl>
 1 61836630 <dbl[…]>                 3.04                 2.83       0.170
 2 61832572 <dbl[…]>                 3.09                 2.83       0.209
 3 61835462 <dbl[…]>                 3.04                 2.89       0.143
 4 61832074 <dbl[…]>                 2.94                 2.77       0.158
 5 61836249 <dbl[…]>                 3.00                 2.56       0.232
 6 61837171 <dbl[…]>                 2.48                 2.40       0.25 
 7 61837503 <dbl[…]>                 3.04                 2.77       0.175
 8 61835105 <dbl[…]>                 3.04                 2.83       0.197
 9 61833454 <dbl[…]>                 2.89                 2.71       0.148
10 61835688 <dbl[…]>                 2.83                 2.64       0.211
# ℹ 6 more variables: trophic_length <dbl>, year <int>, month <int>,
#   longitude <dbl>, latitude <dbl>, survey <chr>
year_list <- c(2000, 2005, 2010, 2015, 2020)
foodweb_long <- foodweb |>
  pivot_longer(
    cols = c(log_species_richness, connectance, trophic_length),
    names_to = "variable",
    values_to = "value"
  ) |>
  filter(year %in% year_list)

vars <- unique(foodweb_long$variable)

plots <- purrr::map(vars, function(var) {
  foodweb_long |>
    filter(variable == var) |>
    ggplot(aes(x = longitude, y = latitude, color = value)) +
    geom_point(size = 2.5) +
    geom_sf(data = coastline, inherit.aes = FALSE, linewidth = 0.3) +
    facet_wrap(~year, nrow = 1) +
    scale_color_viridis_c(name = var) +
    coord_sf(
      xlim = range(foodweb_long$longitude),
      ylim = range(foodweb_long$latitude)
    ) +
    labs(x = NULL, y = NULL)
})

patchwork::wrap_plots(plots, ncol = 1)

We can also have a look at the correlations between the different structure metrics.

GGally::ggpairs(
  foodweb |>
    filter(year %in% year_vals) |>
    mutate(year = as.factor(year)),
  columns = c("log_trophic_richness", "connectance", "trophic_length"),
  aes(colour = year)
)

We see a strong correlation between richness and connectance and trophic length, which is respectively negtive and positive.

Matching food web and environmental data

We match food webs with the closest environmental data available. We also record the distance between the foodweb and the environmental data, so we can later filter out food webs for which we don’t environmental data available (e.g. if the distance is greater than 20 km).

fw_with_env <- tar_read(fw_with_env)

Next, we want to check that the distance to environmental data is consistent with our expectation, that is, the distance is low for the atlantic coast, and increases as we go further into the Manche.

ggplot(fw_with_env, aes(x = longitude, y = latitude, color = dist_to_env)) +
  geom_point(size = 2.5) +
  geom_sf(data = coastline, inherit.aes = FALSE, linewidth = 0.3) +
  scale_color_viridis_c() +
  coord_sf(
    xlim = range(fw_with_env$longitude),
    ylim = range(fw_with_env$latitude)
  ) +
  labs(x = NULL, y = NULL)

dist_max <- 20

Let’s say, that we want to keep only food webs for which with have environmental variables closer than 20 km, and map them.

ggplot(
  fw_with_env |> filter(dist_to_env < dist_max),
  aes(x = longitude, y = latitude, color = dist_to_env)
) +
  geom_point(size = 2.5) +
  geom_sf(data = coastline, inherit.aes = FALSE, linewidth = 0.3) +
  scale_color_viridis_c() +
  coord_sf(
    xlim = range(fw_with_env$longitude),
    ylim = range(fw_with_env$latitude)
  ) +
  labs(x = NULL, y = NULL)

For now we will work with these food webs.

fw_with_env <- fw_with_env |>
  filter(dist_to_env < dist_max)

We can do a RDA to investigate how environmental variables affect food web structure metrics.

response <- fw_with_env |>
  select(
    log_species_richness, log_trophic_richness, connectance, trophic_length
  )

predictor <- fw_with_env |>
  select(
    chl_mean, nppv_mean, o2_mean, temp_mean,
    sali_mean, bathymetry, temp_cv, o2_cv, sali_cv
  ) |>
  scale()

cor(predictor, use = "complete.obs") |> round(2)
           chl_mean nppv_mean o2_mean temp_mean sali_mean bathymetry temp_cv
chl_mean       1.00     -0.01    0.79      0.43     -0.76       0.18    0.09
nppv_mean     -0.01      1.00   -0.12      0.24      0.08       0.00   -0.07
o2_mean        0.79     -0.12    1.00      0.41     -0.55       0.16    0.17
temp_mean      0.43      0.24    0.41      1.00     -0.59       0.42    0.10
sali_mean     -0.76      0.08   -0.55     -0.59      1.00      -0.27   -0.05
bathymetry     0.18      0.00    0.16      0.42     -0.27       1.00    0.02
temp_cv        0.09     -0.07    0.17      0.10     -0.05       0.02    1.00
o2_cv          0.77     -0.31    0.72      0.04     -0.49       0.02    0.19
sali_cv        0.64     -0.16    0.51      0.49     -0.94       0.21    0.08
           o2_cv sali_cv
chl_mean    0.77    0.64
nppv_mean  -0.31   -0.16
o2_mean     0.72    0.51
temp_mean   0.04    0.49
sali_mean  -0.49   -0.94
bathymetry  0.02    0.21
temp_cv     0.19    0.08
o2_cv       1.00    0.47
sali_cv     0.47    1.00
m <- vegan::rda(response, predictor)
summary(m)

Call:
rda(X = response, Y = predictor) 

Partitioning of variance:
              Inertia Proportion
Total         0.42860    1.00000
Constrained   0.02029    0.04733
Unconstrained 0.40831    0.95267

Eigenvalues, and their contribution to the variance 

Importance of components:
                         RDA1      RDA2      RDA3      RDA4    PC1     PC2
Eigenvalue            0.01932 0.0009216 3.418e-05 1.007e-05 0.3664 0.03919
Proportion Explained  0.04508 0.0021502 7.976e-05 2.351e-05 0.8549 0.09143
Cumulative Proportion 0.04508 0.0472284 4.731e-02 4.733e-02 0.9022 0.99367
                           PC3       PC4
Eigenvalue            0.002334 0.0003773
Proportion Explained  0.005445 0.0008804
Cumulative Proportion 0.999120 1.0000000

Accumulated constrained eigenvalues
Importance of components:
                         RDA1      RDA2      RDA3      RDA4
Eigenvalue            0.01932 0.0009216 3.418e-05 1.007e-05
Proportion Explained  0.95239 0.0454280 1.685e-03 4.966e-04
Cumulative Proportion 0.95239 0.9978183 9.995e-01 1.000e+00
ggvegan::autoplot(m)

Most of the variance is still unexplained by environmental driver. It may mean two things. First, we are missing some of the key environmental driver. Second, there is a high stochaticity (which can be expected). Interestingly, we see that the salinity is closely related to the richness.

We can have use variance partitioning method to assess what fraction of the variance is explained by environment, spatial and temporal effects respectively.

variance_partitioning <- vegan::varpart(
  response, predictor,
  fw_with_env |> select(longitude, latitude),
  fw_with_env |> select(year)
)

plot(
  variance_partitioning,
  digits = 2,
  Xnames = c("Environment", "Space", "Time")
)

The continuum: map sea and river data

load(file = here::here("data", "river", "station.rda"))
web_in_web_loc_path <- here::here("data", "river", "web_in_web.rda")
load(file = web_in_web_loc_path)
plot(riveratlas_station["ord_stra"])

com_metrics_st <- com_metrics |>
  left_join(op_st_filtered$op, by = "opcod") |>
  group_by(station) |>
  summarise(
    begin = min(lubridate::year(date)),
    end = max(lubridate::year(date)),
    nsampling = n(),
    med_richness = median(richness),
    med_biomass = median(biomass),
    med_nind = median(nind),
    .groups = "drop"
  )

net_metrics_st <- network_metrics |>
  group_by(station) |>
  summarise(
    med_connectance = median(connectance),
    med_max_tlvl = median(max_troph_lvl),
    med_nbnode = median(nbnode),
    med_w_avg_tlvl = median(weighted_avg_obs_tlvl),
    .groups = "drop"
  )

st_to_plot <- op_st_filtered$station |>
  select(station, libelle_sandre, town) |>
  left_join(com_metrics_st, by = "station") |>
  left_join(net_metrics_st, by = "station") |>
  mutate(
    longitude = st_coordinates(geometry)[, 1],
    latitude = st_coordinates(geometry)[, 2]
  ) |>
  st_drop_geometry()

Is there a continuum?

river <- st_to_plot |>
  select(longitude, latitude, med_richness, med_connectance, med_max_tlvl) |>
  rename(connectance = med_connectance, trophic_length = med_max_tlvl) |>
  mutate(log_species_richness = log(med_richness), type = "river") |>
  select(-med_richness)

sea <- foodweb |>
  select(
    longitude, latitude, connectance, log_species_richness, trophic_length
  ) |>
  mutate(type = "sea")

river_sea <- rbind(river, sea)

river_sea |>
  ggplot(aes(x = longitude, y = latitude, color = type)) +
  geom_point(size = 2.5) +
  geom_sf(data = coastline, inherit.aes = FALSE, linewidth = 0.3) +
  coord_sf(
    xlim = range(river_sea$longitude),
    ylim = range(river_sea$latitude)
  ) +
  labs(x = NULL, y = NULL)

Species richness.

river_sea |>
  ggplot(aes(x = longitude, y = latitude, color = log_species_richness)) +
  geom_point(size = 2.5) +
  geom_sf(data = coastline, inherit.aes = FALSE, linewidth = 0.3) +
  scale_color_viridis_c() +
  coord_sf(
    xlim = range(river_sea$longitude),
    ylim = range(river_sea$latitude)
  ) +
  labs(x = NULL, y = NULL)

Connectance.

river_sea |>
  ggplot(aes(x = longitude, y = latitude, color = connectance)) +
  geom_point(size = 2.5) +
  geom_sf(data = coastline, inherit.aes = FALSE, linewidth = 0.3) +
  scale_color_viridis_c() +
  coord_sf(
    xlim = range(river_sea$longitude),
    ylim = range(river_sea$latitude)
  ) +
  labs(x = NULL, y = NULL)

Trophic length.

river_sea |>
  ggplot(aes(x = longitude, y = latitude, color = trophic_length)) +
  geom_point(size = 2.5) +
  geom_sf(data = coastline, inherit.aes = FALSE, linewidth = 0.3) +
  scale_color_viridis_c() +
  coord_sf(
    xlim = range(river_sea$longitude),
    ylim = range(river_sea$latitude)
  ) +
  labs(x = NULL, y = NULL)

river_sea |>
  st_as_sf(coords = c("longitude", "latitude"), crs = 4326) |>
  mapview::mapview(
    zcol = c("type", "log_species_richness", "connectance", "trophic_length")
  )