library(renv)
renv::load()
renv::restore() # Install missing librairies.
renv::status() # Check the project state.
library(targets)
library(here)
tar_config_set(
store = here::here("_targets")
)
library(ggplot2)
set_theme(theme_minimal())
library(dplyr)Data overview
Summary
Sea survey data
Here is the map of the sampling station for the different surveys.
tar_read(plot_station)Sea environmental data
Here is the kind of environment data we have for the sea.
env <- tar_read(environment_data)
head(env) X x y chl_mean nppv_mean o2_mean o2_cv o2_perc_25
1 7 -4.805556 48.50000 1.0947770 38.60499 104.9755 1.708866 103.3277
2 8 -4.777778 48.50000 1.1134708 39.24829 105.0184 1.758806 103.3193
3 185 -4.833333 48.47222 1.1021804 38.82361 104.9885 1.723767 103.3259
4 186 -4.805556 48.47222 1.1298747 39.73487 105.0475 1.792115 103.3152
5 187 -4.777778 48.47222 1.1297860 39.75322 105.0497 1.794766 103.3144
6 362 -4.888889 48.44444 0.9154485 32.93176 104.6185 1.296982 103.3873
temp_mean temp_cv temp_perc_90 sali_mean sali_cv sali_tresh_10
1 14.40799 4.996541 15.06966 34.92360 0.8238475 0
2 14.45692 5.114160 15.13361 34.91428 0.8358042 0
3 14.42153 5.027339 15.08706 34.92090 0.8266643 0
4 14.48728 5.183352 15.17265 34.90825 0.8421609 0
5 14.49050 5.192138 15.17704 34.90770 0.8432401 0
6 14.01894 4.085622 14.56534 34.99931 0.7355736 0
sali_perc_90 wave_mean bathymetry substrat QmMAnomalie_jan_avr year
1 35.24728 Niveau3 -33.13334 Graviers 0 1993
2 35.24393 Niveau3 -16.31439 Graviers 0 1993
3 35.24614 Niveau3 -49.58667 Graviers 0 1993
4 35.24137 Niveau3 -31.59556 Graviers 0 1993
5 35.24128 Niveau3 -14.81751 Graviers 0 1993
6 35.27700 Niveau3 -37.97334 Graviers 0 1993
We can have a look at the evolution of average temperature with years.
env |>
group_by(year) |>
summarise(annual_temperature = mean(temp_mean), .groups = "drop") |>
ggplot(aes(x = year, y = annual_temperature)) +
geom_point() +
geom_line() +
labs(x = "Year", y = "Mean annual temperature")However, this is not very informative because we do not account for the spatialization of the data. A better way, to investigate the data is to plot yearly map of the different environmental variables. For example, below we plot the distribution of mean oxygen level, chorophyl concentration, and temperature.
library(tidyr)
library(sf)
library(rnaturalearth)
library(rnaturalearthhires)
coastline <- ne_coastline(scale = "large", returnclass = "sf")
var_list <- c("o2_mean", "nppv_mean", "temp_mean")
year_list <- c(2019, 2020, 2021, 2022, 2023)
env_long <- env |>
pivot_longer(
cols = c(
chl_mean, nppv_mean, o2_mean, o2_cv,
o2_perc_25, temp_mean, temp_cv, temp_perc_90, sali_mean
),
names_to = "variable",
values_to = "value"
) |>
group_by(variable) |>
filter(year %in% year_list & variable %in% var_list) |>
mutate(value_scaled = scale(value)) |>
ungroup()
ggplot(env_long, aes(x = x, y = y, fill = value_scaled)) +
geom_tile() +
geom_sf(data = coastline, inherit.aes = FALSE, linewidth = 0.3) +
facet_grid(variable ~ year) +
scale_fill_viridis_c() +
coord_sf(xlim = range(env$x), ylim = range(env$y))We can also plot the salinity for a given year, say 2023. We use a log scale, as salinity changes a lot in the estuary.
env |>
mutate(log_salinity = log10(sali_mean)) |>
filter(year == 2023) |>
ggplot(aes(x = x, y = y, fill = log_salinity)) +
geom_tile() +
geom_sf(data = coastline, inherit.aes = FALSE, linewidth = 0.3) +
scale_fill_viridis_c() +
coord_sf(xlim = range(env$x), ylim = range(env$y))We see that different environmental variables seem correlated. We can have a look at them more exhaustively with pairwise scatterplot.
cols <- c(
"chl_mean", "nppv_mean", "o2_mean", "temp_mean", "sali_mean",
"bathymetry"
)
year_vals <- c(2000, 2010, 2020)
GGally::ggpairs(
env |> mutate(year = as.factor(year)) |> filter(year %in% year_vals),
columns = cols,
aes(colour = year, alpha = 0.1)
)First, we see that we have some strong correlations between variables, such as, between oxygen and temperature. Interestingly, there are also changes between the correlation between the variables. In particular, we see that in the recent years, there is now a very strong correlation between chlorophyll and net primary productivity. This seems to be due to an explosion in primary productivity values in recent years. This is probably due to a change in the way the productivity is computed, but let’s further investigate.
year_list <- c(2019, 2020, 2021, 2022, 2023)
ggplot(
env |> filter(year %in% year_list),
aes(x = x, y = y, fill = nppv_mean)
) +
geom_tile() +
geom_sf(data = coastline, inherit.aes = FALSE, linewidth = 0.3) +
facet_wrap(~year) +
scale_fill_viridis_c() +
coord_sf(xlim = c(-1.5,-0.5), ylim = c(44.8, 45.8))We see indeed an explosion of the net primary productivity in the Gironde and Loire estuaries.
For comparison let’s plot cholorophyll values.
ggplot(
env |> filter(year %in% year_list),
aes(x = x, y = y, fill = chl_mean)
) +
geom_tile() +
geom_sf(data = coastline, inherit.aes = FALSE, linewidth = 0.3) +
facet_wrap(~year) +
scale_fill_viridis_c() +
coord_sf(xlim = c(-1.5,-0.5), ylim = c(44.8, 45.8))Food web structure metrics
Next, we investigate the structure of local food webs. We focus on food web richness, connectance and trophic length.
foodweb <- tar_read(local_foodwebs)
head(foodweb, n = 10)# A tibble: 10 × 11
trait foodweb log_trophic_richness log_species_richness connectance
<chr> <list> <dbl> <dbl> <dbl>
1 61836630 <dbl[…]> 3.04 2.83 0.170
2 61832572 <dbl[…]> 3.09 2.83 0.209
3 61835462 <dbl[…]> 3.04 2.89 0.143
4 61832074 <dbl[…]> 2.94 2.77 0.158
5 61836249 <dbl[…]> 3.00 2.56 0.232
6 61837171 <dbl[…]> 2.48 2.40 0.25
7 61837503 <dbl[…]> 3.04 2.77 0.175
8 61835105 <dbl[…]> 3.04 2.83 0.197
9 61833454 <dbl[…]> 2.89 2.71 0.148
10 61835688 <dbl[…]> 2.83 2.64 0.211
# ℹ 6 more variables: trophic_length <dbl>, year <int>, month <int>,
# longitude <dbl>, latitude <dbl>, survey <chr>
year_list <- c(2000, 2005, 2010, 2015, 2020)
foodweb_long <- foodweb |>
pivot_longer(
cols = c(log_species_richness, connectance, trophic_length),
names_to = "variable",
values_to = "value"
) |>
filter(year %in% year_list)
vars <- unique(foodweb_long$variable)
plots <- purrr::map(vars, function(var) {
foodweb_long |>
filter(variable == var) |>
ggplot(aes(x = longitude, y = latitude, color = value)) +
geom_point(size = 2.5) +
geom_sf(data = coastline, inherit.aes = FALSE, linewidth = 0.3) +
facet_wrap(~year, nrow = 1) +
scale_color_viridis_c(name = var) +
coord_sf(
xlim = range(foodweb_long$longitude),
ylim = range(foodweb_long$latitude)
) +
labs(x = NULL, y = NULL)
})
patchwork::wrap_plots(plots, ncol = 1)We can also have a look at the correlations between the different structure metrics.
GGally::ggpairs(
foodweb |>
filter(year %in% year_vals) |>
mutate(year = as.factor(year)),
columns = c("log_trophic_richness", "connectance", "trophic_length"),
aes(colour = year)
)We see a strong correlation between richness and connectance and trophic length, which is respectively negtive and positive.
Matching food web and environmental data
We match food webs with the closest environmental data available. We also record the distance between the foodweb and the environmental data, so we can later filter out food webs for which we don’t environmental data available (e.g. if the distance is greater than 20 km).
fw_with_env <- tar_read(fw_with_env)Next, we want to check that the distance to environmental data is consistent with our expectation, that is, the distance is low for the atlantic coast, and increases as we go further into the Manche.
ggplot(fw_with_env, aes(x = longitude, y = latitude, color = dist_to_env)) +
geom_point(size = 2.5) +
geom_sf(data = coastline, inherit.aes = FALSE, linewidth = 0.3) +
scale_color_viridis_c() +
coord_sf(
xlim = range(fw_with_env$longitude),
ylim = range(fw_with_env$latitude)
) +
labs(x = NULL, y = NULL)dist_max <- 20Let’s say, that we want to keep only food webs for which with have environmental variables closer than 20 km, and map them.
ggplot(
fw_with_env |> filter(dist_to_env < dist_max),
aes(x = longitude, y = latitude, color = dist_to_env)
) +
geom_point(size = 2.5) +
geom_sf(data = coastline, inherit.aes = FALSE, linewidth = 0.3) +
scale_color_viridis_c() +
coord_sf(
xlim = range(fw_with_env$longitude),
ylim = range(fw_with_env$latitude)
) +
labs(x = NULL, y = NULL)For now we will work with these food webs.
fw_with_env <- fw_with_env |>
filter(dist_to_env < dist_max)We can do a RDA to investigate how environmental variables affect food web structure metrics.
response <- fw_with_env |>
select(
log_species_richness, log_trophic_richness, connectance, trophic_length
)
predictor <- fw_with_env |>
select(
chl_mean, nppv_mean, o2_mean, temp_mean,
sali_mean, bathymetry, temp_cv, o2_cv, sali_cv
) |>
scale()
cor(predictor, use = "complete.obs") |> round(2) chl_mean nppv_mean o2_mean temp_mean sali_mean bathymetry temp_cv
chl_mean 1.00 -0.01 0.79 0.43 -0.76 0.18 0.09
nppv_mean -0.01 1.00 -0.12 0.24 0.08 0.00 -0.07
o2_mean 0.79 -0.12 1.00 0.41 -0.55 0.16 0.17
temp_mean 0.43 0.24 0.41 1.00 -0.59 0.42 0.10
sali_mean -0.76 0.08 -0.55 -0.59 1.00 -0.27 -0.05
bathymetry 0.18 0.00 0.16 0.42 -0.27 1.00 0.02
temp_cv 0.09 -0.07 0.17 0.10 -0.05 0.02 1.00
o2_cv 0.77 -0.31 0.72 0.04 -0.49 0.02 0.19
sali_cv 0.64 -0.16 0.51 0.49 -0.94 0.21 0.08
o2_cv sali_cv
chl_mean 0.77 0.64
nppv_mean -0.31 -0.16
o2_mean 0.72 0.51
temp_mean 0.04 0.49
sali_mean -0.49 -0.94
bathymetry 0.02 0.21
temp_cv 0.19 0.08
o2_cv 1.00 0.47
sali_cv 0.47 1.00
m <- vegan::rda(response, predictor)
summary(m)
Call:
rda(X = response, Y = predictor)
Partitioning of variance:
Inertia Proportion
Total 0.42860 1.00000
Constrained 0.02029 0.04733
Unconstrained 0.40831 0.95267
Eigenvalues, and their contribution to the variance
Importance of components:
RDA1 RDA2 RDA3 RDA4 PC1 PC2
Eigenvalue 0.01932 0.0009216 3.418e-05 1.007e-05 0.3664 0.03919
Proportion Explained 0.04508 0.0021502 7.976e-05 2.351e-05 0.8549 0.09143
Cumulative Proportion 0.04508 0.0472284 4.731e-02 4.733e-02 0.9022 0.99367
PC3 PC4
Eigenvalue 0.002334 0.0003773
Proportion Explained 0.005445 0.0008804
Cumulative Proportion 0.999120 1.0000000
Accumulated constrained eigenvalues
Importance of components:
RDA1 RDA2 RDA3 RDA4
Eigenvalue 0.01932 0.0009216 3.418e-05 1.007e-05
Proportion Explained 0.95239 0.0454280 1.685e-03 4.966e-04
Cumulative Proportion 0.95239 0.9978183 9.995e-01 1.000e+00
ggvegan::autoplot(m)Most of the variance is still unexplained by environmental driver. It may mean two things. First, we are missing some of the key environmental driver. Second, there is a high stochaticity (which can be expected). Interestingly, we see that the salinity is closely related to the richness.
We can have use variance partitioning method to assess what fraction of the variance is explained by environment, spatial and temporal effects respectively.
variance_partitioning <- vegan::varpart(
response, predictor,
fw_with_env |> select(longitude, latitude),
fw_with_env |> select(year)
)
plot(
variance_partitioning,
digits = 2,
Xnames = c("Environment", "Space", "Time")
)The continuum: map sea and river data
load(file = here::here("data", "river", "station.rda"))
web_in_web_loc_path <- here::here("data", "river", "web_in_web.rda")
load(file = web_in_web_loc_path)
plot(riveratlas_station["ord_stra"])com_metrics_st <- com_metrics |>
left_join(op_st_filtered$op, by = "opcod") |>
group_by(station) |>
summarise(
begin = min(lubridate::year(date)),
end = max(lubridate::year(date)),
nsampling = n(),
med_richness = median(richness),
med_biomass = median(biomass),
med_nind = median(nind),
.groups = "drop"
)
net_metrics_st <- network_metrics |>
group_by(station) |>
summarise(
med_connectance = median(connectance),
med_max_tlvl = median(max_troph_lvl),
med_nbnode = median(nbnode),
med_w_avg_tlvl = median(weighted_avg_obs_tlvl),
.groups = "drop"
)
st_to_plot <- op_st_filtered$station |>
select(station, libelle_sandre, town) |>
left_join(com_metrics_st, by = "station") |>
left_join(net_metrics_st, by = "station") |>
mutate(
longitude = st_coordinates(geometry)[, 1],
latitude = st_coordinates(geometry)[, 2]
) |>
st_drop_geometry()Is there a continuum?
river <- st_to_plot |>
select(longitude, latitude, med_richness, med_connectance, med_max_tlvl) |>
rename(connectance = med_connectance, trophic_length = med_max_tlvl) |>
mutate(log_species_richness = log(med_richness), type = "river") |>
select(-med_richness)
sea <- foodweb |>
select(
longitude, latitude, connectance, log_species_richness, trophic_length
) |>
mutate(type = "sea")
river_sea <- rbind(river, sea)
river_sea |>
ggplot(aes(x = longitude, y = latitude, color = type)) +
geom_point(size = 2.5) +
geom_sf(data = coastline, inherit.aes = FALSE, linewidth = 0.3) +
coord_sf(
xlim = range(river_sea$longitude),
ylim = range(river_sea$latitude)
) +
labs(x = NULL, y = NULL)Species richness.
river_sea |>
ggplot(aes(x = longitude, y = latitude, color = log_species_richness)) +
geom_point(size = 2.5) +
geom_sf(data = coastline, inherit.aes = FALSE, linewidth = 0.3) +
scale_color_viridis_c() +
coord_sf(
xlim = range(river_sea$longitude),
ylim = range(river_sea$latitude)
) +
labs(x = NULL, y = NULL)Connectance.
river_sea |>
ggplot(aes(x = longitude, y = latitude, color = connectance)) +
geom_point(size = 2.5) +
geom_sf(data = coastline, inherit.aes = FALSE, linewidth = 0.3) +
scale_color_viridis_c() +
coord_sf(
xlim = range(river_sea$longitude),
ylim = range(river_sea$latitude)
) +
labs(x = NULL, y = NULL)Trophic length.
river_sea |>
ggplot(aes(x = longitude, y = latitude, color = trophic_length)) +
geom_point(size = 2.5) +
geom_sf(data = coastline, inherit.aes = FALSE, linewidth = 0.3) +
scale_color_viridis_c() +
coord_sf(
xlim = range(river_sea$longitude),
ylim = range(river_sea$latitude)
) +
labs(x = NULL, y = NULL)river_sea |>
st_as_sf(coords = c("longitude", "latitude"), crs = 4326) |>
mapview::mapview(
zcol = c("type", "log_species_richness", "connectance", "trophic_length")
)