library(renv)
renv::load()
renv::restore() # Install missing librairies.
renv::status() # Check the project state.Explore coastal data
Summary
In this notebook we
- explore the coastal data of fish catches from different campagnes (Nurse, Pomet and Solper)
- extract general information about each campagne (sampling effort, trait location)
- compute the how often each fish size is measured
0. Setup
Load dependencies.
library(here)
library(dplyr)
library(ggplot2)
set_theme(theme_minimal())
options(
ggplot2.continuous.colour = "viridis",
ggplot2.continuous.fill = "viridis"
)data_folder <- here("data", "sea", "raw")
list.files(data_folder) [1] "Captures_Nurse_1980_2023.csv"
[2] "captures_SOLPER_2005_2011.csv"
[3] "fish_diet.csv"
[4] "poisson_pomet.csv"
[5] "poisson_tailles_pomet.csv"
[6] "POMET_RefTax_Sp.csv"
[7] "Reftax_Captures_NURSE_DCE_SUIVINOUR_SOLPER_2024.RData"
[8] "Strates_Nurse_1980_2023.csv"
[9] "strates_SOLPER_2005_2011.csv"
[10] "Tailles_Nurse_1980_2023.csv"
[11] "tailles_SOLPER_2005_2011.csv"
[12] "Traits_Nurse_1980_2023.csv"
[13] "traits_SOLPER_2005_2011.csv"
We see that we have multiple csv files. We have abundance and biomass data for the Atlantic (Nurse, Solper), Atlantic and Manche (POMET).
1. Pomet campagne
1.1. Clean the data
First let’s load files.
d.poisson <- read.csv(here(data_folder, "poisson_pomet.csv"), sep = ";")
head(d.poisson) ID_interne_passage ID_interne_prelevement ID_Zone
1 60733591 61839708 FRGT20 - Le Blavet
2 60736834 61836379 FRHC14 - Baie de Caen
3 60731145 61831377 FRHC10 - Baie des Veys
4 60729058 61837714 FRGT31 - La S\xe8vre Niortaise
5 60736468 61839934 FRGT28 - La Loire
6 60731828 61832476 FRGT28 - La Loire
Zone
1 Le Blavet - ME
2 Estuaire de l'Orne - ME
3 Baie des Veys :fond de baie estuarien et chenaux d'Isigny et de Carentan
4 La S\xe8vre Niortaise
5 La Loire
6 La Loire
Annee Mois Date Heure Trait Coord_Deb_xmin Coord_Deb_ymin
1 2012 5 11/05/2012 12:56:00 Trait n\xb0 3 -3,28684 47,77047
2 2018 10 08/10/2018 10:53:00 Trait n\xb0 12 -0,31704 49,1849
3 2018 5 13/05/2018 20:55:00 Trait n\xb0 13 -1,11569 49,337879
4 2009 5 15/05/2009 10:13:00 Trait n\xb0 4 -1,041966667 46,32115
5 2013 6 19/06/2013 12:44:00 Trait n\xb0 5 -1,38 47,29353333
6 2010 9 28/09/2010 15:25:00 Trait n\xb0 19 -1,512716667 47,217
Coord_Fin_xmin Coord_Fin_ymin Prof Engin_peche NomScient
1 -3,28449 47,77652 3,1 chalut a perche_1.46m Abramis
2 -0,32433 49,18178 3,7 chalut a perche_1.46m Abramis brama
3 -1,11738 49,346859 4,6 chalut a perche_1.46m Abramis brama
4 -1,031566667 46,31883333 2,6 chalut a perche_3m Abramis brama
5 -1,3666 47,3006 3,8 chalut a perche_3m Abramis brama
6 -1,49605 47,2212 3,42 chalut a perche_3m Abramis brama
Ecologique Trophique Position Nind_esp Pds_esp
1 FW IB D 1 1
2 FW I D 49 252
3 FW I D 1 <NA>
4 FW I D 53 480
5 FW I D 1 57
6 FW I D 1 37
Commentaire
1 Station : B4 b. Coef. de mar\xe9e : 66.0. Issu de la reprise POMET du 21/11/2019
2 Jusant. Station : Amont. Coef. de mar\xe9e : 98.0. Issu de la reprise POMET du 21/11/2019
3 Coef. de mar\xe9e : 81.0. Issu de la reprise POMET du 21/11/2019
4 Station : 12B. Coef. de mar\xe9e : 50.0. Issu de la reprise POMET du 21/11/2019
5 Coef. de mar\xe9e : 57.0. Issu de la reprise POMET du 21/11/2019
6 Coef. de mar\xe9e : 72.0. Issu de la reprise POMET du 21/11/2019
Let’s do a bit of cleaning. Remove irrelevant columns.
d.poisson <- d.poisson |> select(-c(Commentaire, Ecologique, Trophique, Engin_peche, Zone))
names(d.poisson) [1] "ID_interne_passage" "ID_interne_prelevement" "ID_Zone"
[4] "Annee" "Mois" "Date"
[7] "Heure" "Trait" "Coord_Deb_xmin"
[10] "Coord_Deb_ymin" "Coord_Fin_xmin" "Coord_Fin_ymin"
[13] "Prof" "NomScient" "Position"
[16] "Nind_esp" "Pds_esp"
Rename columns for clarity.
d.poisson <- d.poisson |> rename(
zone = ID_Zone,
year = Annee,
month = Mois,
hour = Heure,
date = Date,
trait = Trait,
start_x = Coord_Deb_xmin,
start_y = Coord_Deb_ymin,
end_x = Coord_Fin_xmin,
end_y = Coord_Fin_ymin,
depth = Prof,
species = NomScient,
position = Position,
abundance = Nind_esp,
weight_batch = Pds_esp
)
head(d.poisson) ID_interne_passage ID_interne_prelevement zone year
1 60733591 61839708 FRGT20 - Le Blavet 2012
2 60736834 61836379 FRHC14 - Baie de Caen 2018
3 60731145 61831377 FRHC10 - Baie des Veys 2018
4 60729058 61837714 FRGT31 - La S\xe8vre Niortaise 2009
5 60736468 61839934 FRGT28 - La Loire 2013
6 60731828 61832476 FRGT28 - La Loire 2010
month date hour trait start_x start_y
1 5 11/05/2012 12:56:00 Trait n\xb0 3 -3,28684 47,77047
2 10 08/10/2018 10:53:00 Trait n\xb0 12 -0,31704 49,1849
3 5 13/05/2018 20:55:00 Trait n\xb0 13 -1,11569 49,337879
4 5 15/05/2009 10:13:00 Trait n\xb0 4 -1,041966667 46,32115
5 6 19/06/2013 12:44:00 Trait n\xb0 5 -1,38 47,29353333
6 9 28/09/2010 15:25:00 Trait n\xb0 19 -1,512716667 47,217
end_x end_y depth species position abundance weight_batch
1 -3,28449 47,77652 3,1 Abramis D 1 1
2 -0,32433 49,18178 3,7 Abramis brama D 49 252
3 -1,11738 49,346859 4,6 Abramis brama D 1 <NA>
4 -1,031566667 46,31883333 2,6 Abramis brama D 53 480
5 -1,3666 47,3006 3,8 Abramis brama D 1 57
6 -1,49605 47,2212 3,42 Abramis brama D 1 37
We see that the zone ID column also contains zone names which is not necessary.
unique(d.poisson$zone) [1] "FRGT20 - Le Blavet"
[2] "FRHC14 - Baie de Caen"
[3] "FRHC10 - Baie des Veys"
[4] "FRGT31 - La S\xe8vre Niortaise"
[5] "FRGT28 - La Loire"
[6] "FRHT01 - Estuaire de Seine amont Poses dulcaquicole"
[7] "FRGC01 - Baie du Mont-Saint-Michel"
[8] "FRHT08 - la Dives du barrage de Saint Samson a l'embouchure"
[9] "FRFT33 - Estuaire Fluvial Garonne Amont"
[10] "FRHT02 - Estuaire de Seine Moyen dulcaquicole"
[11] "FRHT07 - Risle Martime"
[12] "FRGT27 - La Vilaine"
[13] "FRFC02 - Pertuis Charentais"
[14] "FRFT31 - Estuaire Fluvial Isle"
[15] "FRHC16 - Le Havre Antifer"
[16] "FRGT05 - Le L\xe9guer"
[17] "FRGT04 - Le Jaudy"
[18] "FRGC11 - Baie de Morlaix"
[19] "FRFT32 - Estuaire Fluvial Dordogne"
[20] "FRGT03 - Le Trieux"
[21] "FRFT09 - Estuaire Gironde aval"
[22] "FRGT12 - L'Aulne"
[23] "FRGT18 - La La\xefta"
[24] "FRFT35 - Gironde amont"
[25] "FRGT21 - Rivi\xe8re d'Etel"
[26] "FRGT17 - La Belon"
[27] "FRGT13 - Le Goyen"
[28] "FRAC05 - La Warenne \xe0 Ault"
[29] "FRGT16 - L'Aven"
[30] "FRGT09 - L'Aber Benoit"
[31] "FRGT15 - L'Odet"
[32] "FRGT30 - Le Lay"
[33] "FRGT10 - L'Elorn"
[34] "FRGT19 - Le Scorff"
[35] "FRGT02 - Bassin maritime de la Rance"
[36] "FRGT23 - Rivi\xe8re d'Auray"
[37] "FRGT14 - Rivi\xe8re de Pont-l'Abb\xe9"
[38] "FRGT08 - L'Aber Wrac'h"
We want to keep only the ID.
d.poisson$zone <- substr(d.poisson$zone, 1, 6)
unique(d.poisson$zone) [1] "FRGT20" "FRHC14" "FRHC10" "FRGT31" "FRGT28" "FRHT01" "FRGC01" "FRHT08"
[9] "FRFT33" "FRHT02" "FRHT07" "FRGT27" "FRFC02" "FRFT31" "FRHC16" "FRGT05"
[17] "FRGT04" "FRGC11" "FRFT32" "FRGT03" "FRFT09" "FRGT12" "FRGT18" "FRFT35"
[25] "FRGT21" "FRGT17" "FRGT13" "FRAC05" "FRGT16" "FRGT09" "FRGT15" "FRGT30"
[33] "FRGT10" "FRGT19" "FRGT02" "FRGT23" "FRGT14" "FRGT08"
Now let’s look at which species are more abundant on average.
n <- 10
d <- d.poisson |>
group_by(species) |>
summarise(abundance_avg = mean(abundance)) |>
arrange(desc(abundance_avg)) |>
slice(1:n)
d# A tibble: 10 × 2
species abundance_avg
<chr> <dbl>
1 Sprattus 46.3
2 Pomatoschistus microps 42.1
3 Pomatoschistus minutus 36.5
4 Pomatoschistus 36.4
5 Alburnus alburnus 31.3
6 Sprattus sprattus 30.8
7 Osmerus eperlanus 24.4
8 Clupea harengus 22.0
9 Merlangius merlangus 17.1
10 Pomatoschistus marmoratus 17
Let’s pull these species and plot their time series.
Is it expected that “Spratus” and “Spratus spratus” are two distinct species? This may be an error when species name was written. If so species names should be cleaned up.
common_sp <- d |> pull(species)
d <- d.poisson |>
filter(species %in% common_sp) |>
group_by(species, year) |>
summarise(abundance_avg = mean(abundance), .groups = "drop")
d$log_abundance <- log(d$abundance_avg)
ggplot(d, aes(x = year, y = log_abundance, color = species)) +
geom_point() +
geom_line() +
labs(x = "Year", y = "Log abundance")1.2. Plot traits
First coordinates need to be formatted. They are given as strings and should be converted to numeric values.
library(stringr)
d.poisson <- d.poisson |>
mutate(
start_x = start_x |> str_replace(",", ".") |> as.numeric(),
start_y = start_y |> str_replace(",", ".") |> as.numeric(),
end_x = end_x |> str_replace(",", ".") |> as.numeric(),
end_y = end_y |> str_replace(",", ".") |> as.numeric(),
)
summary(d.poisson[, c("start_x", "start_y", "end_x", "end_y")]) start_x start_y end_x end_y
Min. :-4.6224 Min. :44.56 Min. :-4.6234 Min. :44.56
1st Qu.:-3.3806 1st Qu.:47.30 1st Qu.:-3.3791 1st Qu.:47.30
Median :-1.5255 Median :47.85 Median :-1.5279 Median :47.85
Mean :-1.8722 Mean :47.96 Mean :-1.8720 Mean :47.96
3rd Qu.:-0.7029 3rd Qu.:49.19 3rd Qu.:-0.7024 3rd Qu.:49.18
Max. : 1.6564 Max. :50.26 Max. : 1.6525 Max. :50.26
ggplot(d.poisson, aes(x = start_x, y = start_y)) +
geom_segment(aes(xend = end_x, yend = end_y, color = zone)) +
coord_equal() +
labs(x = "Longitude", y = "Latitude")Now let’s add a map in the background with leaflet. Because we have many traits, we will focus on a single zone. First, we need to convert traits to sf object.
library(sf)
library(leaflet)
zone_plot <- first(d.poisson$zone)
traits_sf <- d.poisson |>
filter(zone == zone_plot) |>
rowwise() |>
mutate(
geometry = st_sfc(st_linestring(
matrix(c(start_x, end_x, start_y, end_y), ncol = 2, byrow = FALSE)
), crs = 4326)
) |>
st_as_sf()Then, we can plot them, using year for the color gradient.
pal <- colorNumeric(
palette = "viridis",
domain = traits_sf$year
)
leaflet(traits_sf) |>
addTiles() |>
addPolygons(
data = traits_sf,
color = ~ pal(year),
opacity = 0.7,
popup = ~ paste("Year: ", year)
) |>
addLegend("bottomright",
pal = pal, values = ~year,
title = "Year",
labFormat = labelFormat(big.mark = ""),
opacity = 1
)Most traits follow the river suggesting that we have processed the data correctly.
Now let’s focus on how fish sizes is measured.
1.3. Get fish size measurement rates
1.3.1. Prepare size data
First let’s read the file containing fish size information.
d.size <- read.csv(here(data_folder, "poisson_tailles_pomet.csv"), sep = ";")
head(d.size) ID_interne_passage ID_interne_prelevement
1 60733591 61839708
2 60728092 61836630
3 60728092 61836630
4 60728092 61836630
5 60728092 61836630
6 60728092 61836630
ID_Zone
1 FRGT20 - Le Blavet
2 FRHT08 - la Dives du barrage de Saint Samson a l'embouchure
3 FRHT08 - la Dives du barrage de Saint Samson a l'embouchure
4 FRHT08 - la Dives du barrage de Saint Samson a l'embouchure
5 FRHT08 - la Dives du barrage de Saint Samson a l'embouchure
6 FRHT08 - la Dives du barrage de Saint Samson a l'embouchure
Zone Annee Mois Date Heure Trait
1 Le Blavet - ME 2012 5 11/05/2012 12:56:00 Trait n\xb0 3
2 La Dives - estuaire 2016 10 01/10/2016 12:13:00 Trait n\xb0 12
3 La Dives - estuaire 2016 10 01/10/2016 12:13:00 Trait n\xb0 12
4 La Dives - estuaire 2016 10 01/10/2016 12:13:00 Trait n\xb0 12
5 La Dives - estuaire 2016 10 01/10/2016 12:13:00 Trait n\xb0 12
6 La Dives - estuaire 2016 10 01/10/2016 12:13:00 Trait n\xb0 12
Coord_Deb_xmin Coord_Deb_ymin Coord_Fin_xmin Coord_Fin_ymin Prof
1 -3,28684 47,77047 -3,28449 47,77652 3,1
2 -0,1631 49,193517 -0,164167 49,18925 2,7
3 -0,1631 49,193517 -0,164167 49,18925 2,7
4 -0,1631 49,193517 -0,164167 49,18925 2,7
5 -0,1631 49,193517 -0,164167 49,18925 2,7
6 -0,1631 49,193517 -0,164167 49,18925 2,7
Engin_peche Pour_satO2 Salinite Temperature_eau NomScient
1 chalut a perche_1.46m 94 8,6 14 Abramis
2 chalut a perche_1.46m 112,5 0,33 15,29 Abramis brama
3 chalut a perche_1.46m 112,5 0,33 15,29 Abramis brama
4 chalut a perche_1.46m 112,5 0,33 15,29 Abramis brama
5 chalut a perche_1.46m 112,5 0,33 15,29 Abramis brama
6 chalut a perche_1.46m 112,5 0,33 15,29 Abramis brama
Ecologique Trophique Position Nind_esp Pds_esp Nind_esp_taille
1 FW IB D 1 1 1
2 FW I D 9 165 1
3 FW I D 9 165 1
4 FW I D 9 165 1
5 FW I D 9 165 1
6 FW I D 9 165 1
Longueur_fourche_mm Longueur_fourche_cm
1 52 5
2 63 6
3 55 6
4 70 7
5 65 6
6 193 19
Commentaire
1 Station : B4 b. Coef. de mar\xe9e : 66.0. Issu de la reprise POMET du 21/11/2019
2 Longueur r\xe9elle : 636 m Pr\xe9sence d'Anondonte. Station : 12. Coef. de mar\xe9e : 90.0. Issu de la reprise POMET du 21/11/2019
3 Longueur r\xe9elle : 636 m Pr\xe9sence d'Anondonte. Station : 12. Coef. de mar\xe9e : 90.0. Issu de la reprise POMET du 21/11/2019
4 Longueur r\xe9elle : 636 m Pr\xe9sence d'Anondonte. Station : 12. Coef. de mar\xe9e : 90.0. Issu de la reprise POMET du 21/11/2019
5 Longueur r\xe9elle : 636 m Pr\xe9sence d'Anondonte. Station : 12. Coef. de mar\xe9e : 90.0. Issu de la reprise POMET du 21/11/2019
6 Longueur r\xe9elle : 636 m Pr\xe9sence d'Anondonte. Station : 12. Coef. de mar\xe9e : 90.0. Issu de la reprise POMET du 21/11/2019
As before let’s do a bit of cleaning before anything else.
d.size <- d.size |> select(-c(
Commentaire,
Ecologique,
Trophique,
Engin_peche,
Zone
))
head(d.size) ID_interne_passage ID_interne_prelevement
1 60733591 61839708
2 60728092 61836630
3 60728092 61836630
4 60728092 61836630
5 60728092 61836630
6 60728092 61836630
ID_Zone Annee Mois
1 FRGT20 - Le Blavet 2012 5
2 FRHT08 - la Dives du barrage de Saint Samson a l'embouchure 2016 10
3 FRHT08 - la Dives du barrage de Saint Samson a l'embouchure 2016 10
4 FRHT08 - la Dives du barrage de Saint Samson a l'embouchure 2016 10
5 FRHT08 - la Dives du barrage de Saint Samson a l'embouchure 2016 10
6 FRHT08 - la Dives du barrage de Saint Samson a l'embouchure 2016 10
Date Heure Trait Coord_Deb_xmin Coord_Deb_ymin
1 11/05/2012 12:56:00 Trait n\xb0 3 -3,28684 47,77047
2 01/10/2016 12:13:00 Trait n\xb0 12 -0,1631 49,193517
3 01/10/2016 12:13:00 Trait n\xb0 12 -0,1631 49,193517
4 01/10/2016 12:13:00 Trait n\xb0 12 -0,1631 49,193517
5 01/10/2016 12:13:00 Trait n\xb0 12 -0,1631 49,193517
6 01/10/2016 12:13:00 Trait n\xb0 12 -0,1631 49,193517
Coord_Fin_xmin Coord_Fin_ymin Prof Pour_satO2 Salinite Temperature_eau
1 -3,28449 47,77652 3,1 94 8,6 14
2 -0,164167 49,18925 2,7 112,5 0,33 15,29
3 -0,164167 49,18925 2,7 112,5 0,33 15,29
4 -0,164167 49,18925 2,7 112,5 0,33 15,29
5 -0,164167 49,18925 2,7 112,5 0,33 15,29
6 -0,164167 49,18925 2,7 112,5 0,33 15,29
NomScient Position Nind_esp Pds_esp Nind_esp_taille Longueur_fourche_mm
1 Abramis D 1 1 1 52
2 Abramis brama D 9 165 1 63
3 Abramis brama D 9 165 1 55
4 Abramis brama D 9 165 1 70
5 Abramis brama D 9 165 1 65
6 Abramis brama D 9 165 1 193
Longueur_fourche_cm
1 5
2 6
3 6
4 7
5 6
6 19
Then let’s rename columns.
d.size <- d.size |> rename(
zone = ID_Zone,
year = Annee,
month = Mois,
hour = Heure,
date = Date,
trait = Trait,
start_x = Coord_Deb_xmin,
start_y = Coord_Deb_ymin,
end_x = Coord_Fin_xmin,
end_y = Coord_Fin_ymin,
depth = Prof,
species = NomScient,
position = Position,
abundance = Nind_esp,
weight_batch = Pds_esp,
batch_size = Nind_esp_taille,
oxygen = Pour_satO2,
salinity = Salinite,
length_cm = Longueur_fourche_cm,
length_mm = Longueur_fourche_mm,
temperature = Temperature_eau
)
head(d.size) ID_interne_passage ID_interne_prelevement
1 60733591 61839708
2 60728092 61836630
3 60728092 61836630
4 60728092 61836630
5 60728092 61836630
6 60728092 61836630
zone year month
1 FRGT20 - Le Blavet 2012 5
2 FRHT08 - la Dives du barrage de Saint Samson a l'embouchure 2016 10
3 FRHT08 - la Dives du barrage de Saint Samson a l'embouchure 2016 10
4 FRHT08 - la Dives du barrage de Saint Samson a l'embouchure 2016 10
5 FRHT08 - la Dives du barrage de Saint Samson a l'embouchure 2016 10
6 FRHT08 - la Dives du barrage de Saint Samson a l'embouchure 2016 10
date hour trait start_x start_y end_x end_y
1 11/05/2012 12:56:00 Trait n\xb0 3 -3,28684 47,77047 -3,28449 47,77652
2 01/10/2016 12:13:00 Trait n\xb0 12 -0,1631 49,193517 -0,164167 49,18925
3 01/10/2016 12:13:00 Trait n\xb0 12 -0,1631 49,193517 -0,164167 49,18925
4 01/10/2016 12:13:00 Trait n\xb0 12 -0,1631 49,193517 -0,164167 49,18925
5 01/10/2016 12:13:00 Trait n\xb0 12 -0,1631 49,193517 -0,164167 49,18925
6 01/10/2016 12:13:00 Trait n\xb0 12 -0,1631 49,193517 -0,164167 49,18925
depth oxygen salinity temperature species position abundance
1 3,1 94 8,6 14 Abramis D 1
2 2,7 112,5 0,33 15,29 Abramis brama D 9
3 2,7 112,5 0,33 15,29 Abramis brama D 9
4 2,7 112,5 0,33 15,29 Abramis brama D 9
5 2,7 112,5 0,33 15,29 Abramis brama D 9
6 2,7 112,5 0,33 15,29 Abramis brama D 9
weight_batch batch_size length_mm length_cm
1 1 1 52 5
2 165 1 63 6
3 165 1 55 6
4 165 1 70 7
5 165 1 65 6
6 165 1 193 19
And cleaning zone ID.
d.size$zone <- substr(d.size$zone, 1, 6)
head(d.size) ID_interne_passage ID_interne_prelevement zone year month date
1 60733591 61839708 FRGT20 2012 5 11/05/2012
2 60728092 61836630 FRHT08 2016 10 01/10/2016
3 60728092 61836630 FRHT08 2016 10 01/10/2016
4 60728092 61836630 FRHT08 2016 10 01/10/2016
5 60728092 61836630 FRHT08 2016 10 01/10/2016
6 60728092 61836630 FRHT08 2016 10 01/10/2016
hour trait start_x start_y end_x end_y depth oxygen
1 12:56:00 Trait n\xb0 3 -3,28684 47,77047 -3,28449 47,77652 3,1 94
2 12:13:00 Trait n\xb0 12 -0,1631 49,193517 -0,164167 49,18925 2,7 112,5
3 12:13:00 Trait n\xb0 12 -0,1631 49,193517 -0,164167 49,18925 2,7 112,5
4 12:13:00 Trait n\xb0 12 -0,1631 49,193517 -0,164167 49,18925 2,7 112,5
5 12:13:00 Trait n\xb0 12 -0,1631 49,193517 -0,164167 49,18925 2,7 112,5
6 12:13:00 Trait n\xb0 12 -0,1631 49,193517 -0,164167 49,18925 2,7 112,5
salinity temperature species position abundance weight_batch batch_size
1 8,6 14 Abramis D 1 1 1
2 0,33 15,29 Abramis brama D 9 165 1
3 0,33 15,29 Abramis brama D 9 165 1
4 0,33 15,29 Abramis brama D 9 165 1
5 0,33 15,29 Abramis brama D 9 165 1
6 0,33 15,29 Abramis brama D 9 165 1
length_mm length_cm
1 52 5
2 63 6
3 55 6
4 70 7
5 65 6
6 193 19
Furthermore, we can notice that length in mm are specified as caracters instead of numerics.
class(d.size$length_mm)[1] "character"
d.size$length_mm <- as.numeric(d.size$length_mm)We can check on a subset of our dataframe that length in mm is ten times the length in cm.
d <- d.size |>
filter(zone == zone_plot) |>
filter(!is.na(length_mm), year == 2014)
ggplot(d, aes(x = length_mm, y = length_cm)) +
geom_point()So let’s remove the length in cm, which is a duplicate and less precise than the length in mm.
d.size <- d.size |> select(-length_cm)
head(d.size) ID_interne_passage ID_interne_prelevement zone year month date
1 60733591 61839708 FRGT20 2012 5 11/05/2012
2 60728092 61836630 FRHT08 2016 10 01/10/2016
3 60728092 61836630 FRHT08 2016 10 01/10/2016
4 60728092 61836630 FRHT08 2016 10 01/10/2016
5 60728092 61836630 FRHT08 2016 10 01/10/2016
6 60728092 61836630 FRHT08 2016 10 01/10/2016
hour trait start_x start_y end_x end_y depth oxygen
1 12:56:00 Trait n\xb0 3 -3,28684 47,77047 -3,28449 47,77652 3,1 94
2 12:13:00 Trait n\xb0 12 -0,1631 49,193517 -0,164167 49,18925 2,7 112,5
3 12:13:00 Trait n\xb0 12 -0,1631 49,193517 -0,164167 49,18925 2,7 112,5
4 12:13:00 Trait n\xb0 12 -0,1631 49,193517 -0,164167 49,18925 2,7 112,5
5 12:13:00 Trait n\xb0 12 -0,1631 49,193517 -0,164167 49,18925 2,7 112,5
6 12:13:00 Trait n\xb0 12 -0,1631 49,193517 -0,164167 49,18925 2,7 112,5
salinity temperature species position abundance weight_batch batch_size
1 8,6 14 Abramis D 1 1 1
2 0,33 15,29 Abramis brama D 9 165 1
3 0,33 15,29 Abramis brama D 9 165 1
4 0,33 15,29 Abramis brama D 9 165 1
5 0,33 15,29 Abramis brama D 9 165 1
6 0,33 15,29 Abramis brama D 9 165 1
length_mm
1 52
2 63
3 55
4 70
5 65
6 193
1.3.2. Full size sampling
Now that the data is ready, we want to see at why frequency all individual fish sizes are measured.
We will begin with rough estimates, then refine these at the species and year levels.
d <- d.size |>
group_by(ID_interne_prelevement, species) |>
summarise(n_measured = sum(batch_size), .groups = "drop")
d.join <- left_join(d.poisson, d, by = c("ID_interne_prelevement", "species"))
head(d.join) ID_interne_passage ID_interne_prelevement zone year month date
1 60733591 61839708 FRGT20 2012 5 11/05/2012
2 60736834 61836379 FRHC14 2018 10 08/10/2018
3 60731145 61831377 FRHC10 2018 5 13/05/2018
4 60729058 61837714 FRGT31 2009 5 15/05/2009
5 60736468 61839934 FRGT28 2013 6 19/06/2013
6 60731828 61832476 FRGT28 2010 9 28/09/2010
hour trait start_x start_y end_x end_y depth
1 12:56:00 Trait n\xb0 3 -3.286840 47.77047 -3.284490 47.77652 3,1
2 10:53:00 Trait n\xb0 12 -0.317040 49.18490 -0.324330 49.18178 3,7
3 20:55:00 Trait n\xb0 13 -1.115690 49.33788 -1.117380 49.34686 4,6
4 10:13:00 Trait n\xb0 4 -1.041967 46.32115 -1.031567 46.31883 2,6
5 12:44:00 Trait n\xb0 5 -1.380000 47.29353 -1.366600 47.30060 3,8
6 15:25:00 Trait n\xb0 19 -1.512717 47.21700 -1.496050 47.22120 3,42
species position abundance weight_batch n_measured
1 Abramis D 1 1 1
2 Abramis brama D 49 252 30
3 Abramis brama D 1 <NA> 1
4 Abramis brama D 53 480 32
5 Abramis brama D 1 57 1
6 Abramis brama D 1 37 1
To check that we did well, we verify that for a fishing event for which we know that all fish sizes have been measured that abundance = n_measured.
d.join |> filter(ID_interne_prelevement == 61836630, species == "Abramis brama") ID_interne_passage ID_interne_prelevement zone year month date
1 60728092 61836630 FRHT08 2016 10 01/10/2016
hour trait start_x start_y end_x end_y depth
1 12:13:00 Trait n\xb0 12 -0.1631 49.19352 -0.164167 49.18925 2,7
species position abundance weight_batch n_measured
1 Abramis brama D 9 165 9
First, let’s look at rough estimates.
n_tot <- nrow(d.join) # 1 row: 1 species & 1 fishing event.
n_full <- nrow(d.join |> filter(abundance == n_measured))
n_full / n_tot[1] 0.8375625
When filtering for full size sampling, we keep 83% of the rows of the original dataframe. This means that most of the time all individual sizes are measured. But, let’s go in further details and how full size sampling depends on year, zone and species.
d.sampling <- d.join |>
filter(!is.na(n_measured)) |>
group_by(species, ID_interne_prelevement) |>
summarise(measurement_rate = n_measured / abundance) |>
arrange(measurement_rate)
head(d.sampling)# A tibble: 6 × 3
# Groups: species [4]
species ID_interne_prelevement measurement_rate
<chr> <int> <dbl>
1 Pomatoschistus microps 61837579 0.00442
2 Sprattus sprattus 61835557 0.0048
3 Pomatoschistus microps 61835680 0.00717
4 Pomatoschistus 61835515 0.00810
5 Sprattus sprattus 61836807 0.0112
6 Trisopterus luscus 61838997 0.0116
We have only few species with low full measurement rate. Even the species with worst full size sampling rate hase a rate of 0.5. One reason that can explain why some species are more often fully measured than other is because of their abundance. Measuring many individuals is cumbersome, so we expect a decreasing trend between full measurement rate and average abundance.
d.abundance.avg <- d.poisson |>
filter(!is.na(abundance)) |>
group_by(species) |>
summarise(abundance_avg = mean(abundance))
d.sampling <- left_join(d.sampling, d.abundance.avg, by = "species")
ggplot(d.sampling, aes(x = abundance_avg, y = measurement_rate)) +
geom_point() +
labs(x = "Average abundance", y = "Full measurement rate")Let’s look if we have trends per zone.
d.sampling <- d.join |>
filter(!is.na(n_measured)) |>
group_by(zone) |>
summarise(measurement_rate = mean(n_measured == abundance)) |>
arrange(measurement_rate)
head(d.sampling)# A tibble: 6 × 2
zone measurement_rate
<chr> <dbl>
1 FRGT23 0.594
2 FRAC05 0.635
3 FRFT33 0.766
4 FRHT08 0.802
5 FRGT14 0.806
6 FRFC02 0.822
There is also high variability between zone, with few zones where the measurement rate is low.
For now, we have seen what we wanted. Overall, fish sizes are most of the time all measured. This will help food web reconstruction.
2. Nurse campagne
Now let’s focus on another campagne: Nurse.
2.1. Clean the data
Let’s load the data.
d.nurse.catch <- read.csv(here(data_folder, "Captures_Nurse_1980_2023.csv"), sep = ";")
head(d.nurse.catch) Campagne Annee Trait Espece Nombre Poids
1 NURSE 1980 1980_1_1527_1 Anguilla anguilla 1 0.039
2 NURSE 1980 1980_1_1527_1 Crangon crangon 1 0.001
3 NURSE 1980 1980_1_1527_1 Platichthys flesus 36 5.501
4 NURSE 1980 1980_1_1527_1 Solea solea 16 0.375
5 NURSE 1980 1980_1_1527_1 Trisopterus luscus 1 0.068
6 NURSE 1980 1980_1_1528_2 Anguilla anguilla 1 0.162
d.nurse.size <- read.csv(here(data_folder, "Tailles_Nurse_1980_2023.csv"), sep = ";")
head(d.nurse.size) Campagne Annee Trait Espece Sexe Maturite Longueur Nombre
1 NURSE 1980 1980_1_1527_1 Anguilla anguilla N NA 27 1
2 NURSE 1980 1980_1_1527_1 Solea solea N NA 10 2
3 NURSE 1980 1980_1_1527_1 Solea solea N NA 11 3
4 NURSE 1980 1980_1_1527_1 Solea solea N NA 12 5
5 NURSE 1980 1980_1_1527_1 Solea solea N NA 13 2
6 NURSE 1980 1980_1_1527_1 Solea solea N NA 14 1
Poids Age
1 NA NA
2 NA NA
3 NA NA
4 NA NA
5 NA NA
6 NA NA
d.nurse.trait <- read.csv(here(data_folder, "Traits_Nurse_1980_2023.csv"), sep = ";")
head(d.nurse.trait) Campagne Annee Trait Mois Strate SurfaceBalayee Lat Long
1 NURSE 1980 1980_1_1527_1 10 Loire 0.004722 47.2823 -2.1500
2 NURSE 1980 1980_1_1528_2 10 Loire 0.004722 47.2822 -2.1333
3 NURSE 1980 1980_1_1529_3 10 Loire 0.003333 47.2657 -2.1860
4 NURSE 1980 1980_1_1530_4 10 Loire 0.004887 47.2333 -2.2083
5 NURSE 1980 1980_1_1531_5 10 Loire 0.004443 47.2533 -2.2450
6 NURSE 1980 1980_1_1532_6 10 Loire 0.005166 47.2887 -2.0982
ProfMoy
1 NA
2 NA
3 NA
4 NA
5 NA
6 NA
d.nurse.strate <- read.csv(here(data_folder, "Strates_Nurse_1980_2023.csv"), sep = ";")
head(d.nurse.strate) Campagne Strate Surface
1 NURSE Bourgneuf 175.2515
2 NURSE Gironde 782.7810
3 NURSE Loire 142.7000
4 NURSE Pertuis Antioche 274.8723
5 NURSE Pertuis Breton 199.0315
6 NURSE Vilaine 329.5000
We see that we have four files. The first file contains information about the catches: what, when, where. The second file contains information about the individual sizes. The third file contains geographical information about the trait. I don’t kow what contains the fourth file. Should ask Anik.
Let’s rename column for consistency with other campagnes.
d.nurse.catch <- d.nurse.catch |>
select(-Campagne) |>
rename(
year = Annee,
trait = Trait,
species = Espece,
abundance = Nombre,
weight_batch = Poids
)
head(d.nurse.catch) year trait species abundance weight_batch
1 1980 1980_1_1527_1 Anguilla anguilla 1 0.039
2 1980 1980_1_1527_1 Crangon crangon 1 0.001
3 1980 1980_1_1527_1 Platichthys flesus 36 5.501
4 1980 1980_1_1527_1 Solea solea 16 0.375
5 1980 1980_1_1527_1 Trisopterus luscus 1 0.068
6 1980 1980_1_1528_2 Anguilla anguilla 1 0.162
d.nurse.size <- d.nurse.size |>
select(-c(Campagne, Poids, Age, Maturite)) |>
rename(
year = Annee,
trait = Trait,
species = Espece,
sex = Sexe,
length = Longueur,
batch_size = Nombre
)
head(d.nurse.size) year trait species sex length batch_size
1 1980 1980_1_1527_1 Anguilla anguilla N 27 1
2 1980 1980_1_1527_1 Solea solea N 10 2
3 1980 1980_1_1527_1 Solea solea N 11 3
4 1980 1980_1_1527_1 Solea solea N 12 5
5 1980 1980_1_1527_1 Solea solea N 13 2
6 1980 1980_1_1527_1 Solea solea N 14 1
d.nurse.trait <- d.nurse.trait |>
select(-c(Campagne, ProfMoy)) |>
rename(
year = Annee,
trait = Trait,
month = Mois,
strate = Strate,
surface = SurfaceBalayee,
lat = Lat,
long = Long
)
head(d.nurse.trait) year trait month strate surface lat long
1 1980 1980_1_1527_1 10 Loire 0.004722 47.2823 -2.1500
2 1980 1980_1_1528_2 10 Loire 0.004722 47.2822 -2.1333
3 1980 1980_1_1529_3 10 Loire 0.003333 47.2657 -2.1860
4 1980 1980_1_1530_4 10 Loire 0.004887 47.2333 -2.2083
5 1980 1980_1_1531_5 10 Loire 0.004443 47.2533 -2.2450
6 1980 1980_1_1532_6 10 Loire 0.005166 47.2887 -2.0982
2.2. Get general information about the campagne
2.2.1. Evolution of the sampling effort
trait_counts <- d.nurse.trait |>
group_by(year) |>
summarise(n_trait = length(unique(trait)))
ggplot(trait_counts, aes(x = year, y = n_trait)) +
geom_line() +
geom_point() +
labs(x = "Year", y = "Number of traits")surface_year <- d.nurse.trait |>
group_by(year) |>
summarise(surface_tot = sum(surface))
ggplot(surface_year, aes(x = year, y = surface_tot)) +
geom_line() +
geom_point() +
labs(x = "Year", y = "Surface sampled")We see high variability in the sampling effort across the year.
2.2.2. Plot geographical position of traits
trait_sf <- st_as_sf(d.nurse.trait, coords = c("long", "lat"), crs = 4326, remove = FALSE)
leaflet(trait_sf) |>
addTiles() |>
addCircleMarkers(
radius = 2, fillOpacity = 0.7, stroke = FALSE,
popup = ~ paste("Trait:", trait, "Year:", year)
)2.2.3. Fish size measurement rate
d.nurse.batch <- d.nurse.size |>
group_by(trait, species) |>
summarise(n_measured = sum(batch_size), .groups = "drop")
head(d.nurse.batch)# A tibble: 6 × 3
trait species n_measured
<chr> <chr> <int>
1 1980_1_1527_1 Anguilla anguilla 1
2 1980_1_1527_1 Solea solea 16
3 1980_1_1527_1 Trisopterus luscus 1
4 1980_1_1528_2 Anguilla anguilla 1
5 1980_1_1528_2 Solea solea 82
6 1980_1_1529_3 Anguilla anguilla 1
d.nurse.batch <- left_join(d.nurse.batch, d.nurse.catch, by = c("species", "trait"))
head(d.nurse.batch)# A tibble: 6 × 6
trait species n_measured year abundance weight_batch
<chr> <chr> <int> <int> <int> <dbl>
1 1980_1_1527_1 Anguilla anguilla 1 1980 1 0.039
2 1980_1_1527_1 Solea solea 16 1980 16 0.375
3 1980_1_1527_1 Trisopterus luscus 1 1980 1 0.068
4 1980_1_1528_2 Anguilla anguilla 1 1980 1 0.162
5 1980_1_1528_2 Solea solea 82 1980 82 1.70
6 1980_1_1529_3 Anguilla anguilla 1 1980 1 0.399
n_tot <- nrow(d.nurse.batch)
n_full <- nrow(d.nurse.batch |> filter(n_measured > abundance))
n_full #/ n_tot[1] 555
For the Nurse campagne, we also have a very high size measurement rate. Let’s investigate the last campagne: SOLPER.
3. Solper campagne
3.1. Clean the data
Let’s load the data.
d.solper.catch <- read.csv(here(data_folder, "captures_SOLPER_2005_2011.csv"), sep = ";")
head(d.solper.catch) Campagne Annee Trait Espece Nombre Poids
1 SOLPER 2005 SPA01 CALMLYR 1 2.9
2 SOLPER 2005 SPA01 ENGRENC 526 575.5
3 SOLPER 2005 SPA01 GOBINIG 1 1.9
4 SOLPER 2005 SPA01 POMOMIN 10 13.4
5 SOLPER 2005 SPA01 SOLESOL 1 31.0
6 SOLPER 2005 SPA01 SPRASPR 7 19.1
d.solper.size <- read.csv(here(data_folder, "tailles_SOLPER_2005_2011.csv"), sep = ";")
head(d.solper.size) Campagne Annee Trait Espece Sexe Maturite Longueur Nombre Poids Age
1 SOLPER 2005 SPA01 SOLESOL I NA 16 1 31 NA
2 SOLPER 2005 SPA02 SOLESOL I NA 6 1 2 NA
3 SOLPER 2005 SPA02 SOLESOL I NA 7 2 8 NA
4 SOLPER 2005 SPA02 SOLESOL I NA 8 6 26 NA
5 SOLPER 2005 SPA02 SOLESOL I NA 9 3 20 NA
6 SOLPER 2005 SPA02 SOLESOL I NA 10 1 10 NA
d.solper.trait <- read.csv(here(data_folder, "traits_SOLPER_2005_2011.csv"), sep = ";")
head(d.solper.trait) Campagne Annee Trait Mois Strate SurfaceBalayee Lat Long ProfMoy
1 SOLPER 2005 SPA01 8 PA 0.001421 46.14030 -1.22043 6
2 SOLPER 2005 SPA02 8 PA 0.001654 46.14084 -1.18852 6
3 SOLPER 2005 SPA03 8 PA 0.001936 45.81676 -1.20102 6
4 SOLPER 2005 SPA04 8 PA 0.001684 45.79853 -1.19984 7
5 SOLPER 2005 SPA05 8 PA 0.002203 45.79681 -1.23134 8
6 SOLPER 2005 SPA06 9 PA 0.002522 46.13919 -1.25329 13
As for the Nurse campagne, we have one file for the catches, one for the individual sizes, and one for the trait positions.
As before, we will just rename the column for consistency.
d.solper.catch <- d.solper.catch |>
select(-Campagne) |>
rename(
year = Annee,
trait = Trait,
species = Espece,
abundance = Nombre,
weight_batch = Poids
)
head(d.solper.catch) year trait species abundance weight_batch
1 2005 SPA01 CALMLYR 1 2.9
2 2005 SPA01 ENGRENC 526 575.5
3 2005 SPA01 GOBINIG 1 1.9
4 2005 SPA01 POMOMIN 10 13.4
5 2005 SPA01 SOLESOL 1 31.0
6 2005 SPA01 SPRASPR 7 19.1
d.solper.size <- d.solper.size |>
select(-c(Campagne, Age, Maturite)) |>
rename(
year = Annee,
trait = Trait,
species = Espece,
batch_size = Nombre,
sex = Sexe,
length = Longueur,
weight = Poids
)
head(d.solper.size) year trait species sex length batch_size weight
1 2005 SPA01 SOLESOL I 16 1 31
2 2005 SPA02 SOLESOL I 6 1 2
3 2005 SPA02 SOLESOL I 7 2 8
4 2005 SPA02 SOLESOL I 8 6 26
5 2005 SPA02 SOLESOL I 9 3 20
6 2005 SPA02 SOLESOL I 10 1 10
d.solper.trait <- d.solper.trait |>
select(-Campagne) |>
rename(
year = Annee,
trait = Trait,
month = Mois,
strate = Strate,
surface = SurfaceBalayee,
lat = Lat,
long = Long,
depth = ProfMoy
)
head(d.solper.trait) year trait month strate surface lat long depth
1 2005 SPA01 8 PA 0.001421 46.14030 -1.22043 6
2 2005 SPA02 8 PA 0.001654 46.14084 -1.18852 6
3 2005 SPA03 8 PA 0.001936 45.81676 -1.20102 6
4 2005 SPA04 8 PA 0.001684 45.79853 -1.19984 7
5 2005 SPA05 8 PA 0.002203 45.79681 -1.23134 8
6 2005 SPA06 9 PA 0.002522 46.13919 -1.25329 13
Now that the data is cleaned let’s look at general information about the campagne.
3.2. Get general information about the campagne
3.2.1. Evolution of sampling effort
trait_year <- d.solper.trait |>
group_by(year) |>
summarise(n_trait = length(unique(trait)), surf_tot = sum(surface))
ggplot(trait_year, aes(x = year, y = n_trait, color = surf_tot)) +
geom_line(color = "grey") +
geom_point() +
labs(x = "Year", y = "Number of traits", color = "Total surface")The see that the sampling effort has been relatively constant, although decreasing with years. Furthermore, as expected, the surface sampled is positively correlated with the number of traits.
3.2.3. Plot trait locations
trait_sf <- st_as_sf(d.solper.trait, coords = c("long", "lat"), crs = 4326, remove = FALSE)
leaflet(trait_sf) |>
addTiles() |>
addCircleMarkers(
radius = 2, fillOpacity = 0.7, stroke = FALSE,
popup = ~ paste("Trait:", trait, "Year:", year)
)3.2.3. Full size sampling
d.solper.batch <- d.solper.size |>
group_by(species, trait, year) |>
summarise(n_measured = sum(batch_size), .groups = "drop")
d.solper.batch <- left_join(d.solper.batch, d.solper.catch, by = c("species", "trait", "year"))
n_tot <- nrow(d.solper.batch)
n_full <- nrow(d.solper.batch |> filter(n_measured == abundance))
n_full / n_tot[1] 1
Wow, it seems that for the Solper campagne every single fish has been measured. It probably means that there has been some filtering about sampled species.