Explore coastal data

Summary

In this notebook we

  • explore the coastal data of fish catches from different campagnes (Nurse, Pomet and Solper)
  • extract general information about each campagne (sampling effort, trait location)
  • compute the how often each fish size is measured

0. Setup

Load dependencies.

library(renv)
renv::load()
renv::restore() # Install missing librairies.
renv::status() # Check the project state.
library(here)
library(dplyr)
library(ggplot2)
set_theme(theme_minimal())
options(
  ggplot2.continuous.colour = "viridis",
  ggplot2.continuous.fill = "viridis"
)
data_folder <- here("data", "sea", "raw")
list.files(data_folder)
 [1] "Captures_Nurse_1980_2023.csv"                         
 [2] "captures_SOLPER_2005_2011.csv"                        
 [3] "fish_diet.csv"                                        
 [4] "poisson_pomet.csv"                                    
 [5] "poisson_tailles_pomet.csv"                            
 [6] "POMET_RefTax_Sp.csv"                                  
 [7] "Reftax_Captures_NURSE_DCE_SUIVINOUR_SOLPER_2024.RData"
 [8] "Strates_Nurse_1980_2023.csv"                          
 [9] "strates_SOLPER_2005_2011.csv"                         
[10] "Tailles_Nurse_1980_2023.csv"                          
[11] "tailles_SOLPER_2005_2011.csv"                         
[12] "Traits_Nurse_1980_2023.csv"                           
[13] "traits_SOLPER_2005_2011.csv"                          

We see that we have multiple csv files. We have abundance and biomass data for the Atlantic (Nurse, Solper), Atlantic and Manche (POMET).

1. Pomet campagne

1.1. Clean the data

First let’s load files.

d.poisson <- read.csv(here(data_folder, "poisson_pomet.csv"), sep = ";")
head(d.poisson)
  ID_interne_passage ID_interne_prelevement                        ID_Zone
1           60733591               61839708             FRGT20 - Le Blavet
2           60736834               61836379          FRHC14 - Baie de Caen
3           60731145               61831377         FRHC10 - Baie des Veys
4           60729058               61837714 FRGT31 - La S\xe8vre Niortaise
5           60736468               61839934              FRGT28 - La Loire
6           60731828               61832476              FRGT28 - La Loire
                                                                      Zone
1                                                           Le Blavet - ME
2                                                  Estuaire de l'Orne - ME
3 Baie des Veys :fond de baie estuarien et chenaux d'Isigny et de Carentan
4                                                    La S\xe8vre Niortaise
5                                                                 La Loire
6                                                                 La Loire
  Annee Mois       Date    Heure          Trait Coord_Deb_xmin Coord_Deb_ymin
1  2012    5 11/05/2012 12:56:00  Trait n\xb0 3       -3,28684       47,77047
2  2018   10 08/10/2018 10:53:00 Trait n\xb0 12       -0,31704        49,1849
3  2018    5 13/05/2018 20:55:00 Trait n\xb0 13       -1,11569      49,337879
4  2009    5 15/05/2009 10:13:00  Trait n\xb0 4   -1,041966667       46,32115
5  2013    6 19/06/2013 12:44:00  Trait n\xb0 5          -1,38    47,29353333
6  2010    9 28/09/2010 15:25:00 Trait n\xb0 19   -1,512716667         47,217
  Coord_Fin_xmin Coord_Fin_ymin Prof           Engin_peche     NomScient
1       -3,28449       47,77652  3,1 chalut a perche_1.46m       Abramis
2       -0,32433       49,18178  3,7 chalut a perche_1.46m Abramis brama
3       -1,11738      49,346859  4,6 chalut a perche_1.46m Abramis brama
4   -1,031566667    46,31883333  2,6    chalut a perche_3m Abramis brama
5        -1,3666        47,3006  3,8    chalut a perche_3m Abramis brama
6       -1,49605        47,2212 3,42    chalut a perche_3m Abramis brama
  Ecologique Trophique Position Nind_esp Pds_esp
1         FW        IB        D        1       1
2         FW         I        D       49     252
3         FW         I        D        1    <NA>
4         FW         I        D       53     480
5         FW         I        D        1      57
6         FW         I        D        1      37
                                                                                Commentaire
1          Station : B4 b. Coef. de mar\xe9e : 66.0. Issu de la reprise POMET du 21/11/2019
2 Jusant. Station : Amont. Coef. de mar\xe9e : 98.0. Issu de la reprise POMET du 21/11/2019
3                          Coef. de mar\xe9e : 81.0. Issu de la reprise POMET du 21/11/2019
4           Station : 12B. Coef. de mar\xe9e : 50.0. Issu de la reprise POMET du 21/11/2019
5                          Coef. de mar\xe9e : 57.0. Issu de la reprise POMET du 21/11/2019
6                          Coef. de mar\xe9e : 72.0. Issu de la reprise POMET du 21/11/2019

Let’s do a bit of cleaning. Remove irrelevant columns.

d.poisson <- d.poisson |> select(-c(Commentaire, Ecologique, Trophique, Engin_peche, Zone))
names(d.poisson)
 [1] "ID_interne_passage"     "ID_interne_prelevement" "ID_Zone"               
 [4] "Annee"                  "Mois"                   "Date"                  
 [7] "Heure"                  "Trait"                  "Coord_Deb_xmin"        
[10] "Coord_Deb_ymin"         "Coord_Fin_xmin"         "Coord_Fin_ymin"        
[13] "Prof"                   "NomScient"              "Position"              
[16] "Nind_esp"               "Pds_esp"               

Rename columns for clarity.

d.poisson <- d.poisson |> rename(
  zone = ID_Zone,
  year = Annee,
  month = Mois,
  hour = Heure,
  date = Date,
  trait = Trait,
  start_x = Coord_Deb_xmin,
  start_y = Coord_Deb_ymin,
  end_x = Coord_Fin_xmin,
  end_y = Coord_Fin_ymin,
  depth = Prof,
  species = NomScient,
  position = Position,
  abundance = Nind_esp,
  weight_batch = Pds_esp
)
head(d.poisson)
  ID_interne_passage ID_interne_prelevement                           zone year
1           60733591               61839708             FRGT20 - Le Blavet 2012
2           60736834               61836379          FRHC14 - Baie de Caen 2018
3           60731145               61831377         FRHC10 - Baie des Veys 2018
4           60729058               61837714 FRGT31 - La S\xe8vre Niortaise 2009
5           60736468               61839934              FRGT28 - La Loire 2013
6           60731828               61832476              FRGT28 - La Loire 2010
  month       date     hour          trait      start_x     start_y
1     5 11/05/2012 12:56:00  Trait n\xb0 3     -3,28684    47,77047
2    10 08/10/2018 10:53:00 Trait n\xb0 12     -0,31704     49,1849
3     5 13/05/2018 20:55:00 Trait n\xb0 13     -1,11569   49,337879
4     5 15/05/2009 10:13:00  Trait n\xb0 4 -1,041966667    46,32115
5     6 19/06/2013 12:44:00  Trait n\xb0 5        -1,38 47,29353333
6     9 28/09/2010 15:25:00 Trait n\xb0 19 -1,512716667      47,217
         end_x       end_y depth       species position abundance weight_batch
1     -3,28449    47,77652   3,1       Abramis        D         1            1
2     -0,32433    49,18178   3,7 Abramis brama        D        49          252
3     -1,11738   49,346859   4,6 Abramis brama        D         1         <NA>
4 -1,031566667 46,31883333   2,6 Abramis brama        D        53          480
5      -1,3666     47,3006   3,8 Abramis brama        D         1           57
6     -1,49605     47,2212  3,42 Abramis brama        D         1           37

We see that the zone ID column also contains zone names which is not necessary.

unique(d.poisson$zone)
 [1] "FRGT20 - Le Blavet"                                         
 [2] "FRHC14 - Baie de Caen"                                      
 [3] "FRHC10 - Baie des Veys"                                     
 [4] "FRGT31 - La S\xe8vre Niortaise"                             
 [5] "FRGT28 - La Loire"                                          
 [6] "FRHT01 - Estuaire de Seine amont Poses  dulcaquicole"       
 [7] "FRGC01 - Baie du Mont-Saint-Michel"                         
 [8] "FRHT08 - la Dives du barrage de Saint Samson a l'embouchure"
 [9] "FRFT33 - Estuaire Fluvial Garonne Amont"                    
[10] "FRHT02 - Estuaire de Seine Moyen  dulcaquicole"             
[11] "FRHT07 - Risle Martime"                                     
[12] "FRGT27 - La Vilaine"                                        
[13] "FRFC02 - Pertuis Charentais"                                
[14] "FRFT31 - Estuaire Fluvial Isle"                             
[15] "FRHC16 - Le Havre   Antifer"                                
[16] "FRGT05 - Le L\xe9guer"                                      
[17] "FRGT04 - Le Jaudy"                                          
[18] "FRGC11 - Baie de Morlaix"                                   
[19] "FRFT32 - Estuaire Fluvial Dordogne"                         
[20] "FRGT03 - Le Trieux"                                         
[21] "FRFT09 - Estuaire Gironde aval"                             
[22] "FRGT12 - L'Aulne"                                           
[23] "FRGT18 - La La\xefta"                                       
[24] "FRFT35 - Gironde amont"                                     
[25] "FRGT21 - Rivi\xe8re  d'Etel"                                
[26] "FRGT17 - La Belon"                                          
[27] "FRGT13 - Le Goyen"                                          
[28] "FRAC05 - La Warenne \xe0 Ault"                              
[29] "FRGT16 - L'Aven"                                            
[30] "FRGT09 - L'Aber Benoit"                                     
[31] "FRGT15 - L'Odet"                                            
[32] "FRGT30 - Le Lay"                                            
[33] "FRGT10 - L'Elorn"                                           
[34] "FRGT19 - Le Scorff"                                         
[35] "FRGT02 - Bassin maritime de la Rance"                       
[36] "FRGT23 - Rivi\xe8re d'Auray"                                
[37] "FRGT14 - Rivi\xe8re de Pont-l'Abb\xe9"                      
[38] "FRGT08 - L'Aber Wrac'h"                                     

We want to keep only the ID.

d.poisson$zone <- substr(d.poisson$zone, 1, 6)
unique(d.poisson$zone)
 [1] "FRGT20" "FRHC14" "FRHC10" "FRGT31" "FRGT28" "FRHT01" "FRGC01" "FRHT08"
 [9] "FRFT33" "FRHT02" "FRHT07" "FRGT27" "FRFC02" "FRFT31" "FRHC16" "FRGT05"
[17] "FRGT04" "FRGC11" "FRFT32" "FRGT03" "FRFT09" "FRGT12" "FRGT18" "FRFT35"
[25] "FRGT21" "FRGT17" "FRGT13" "FRAC05" "FRGT16" "FRGT09" "FRGT15" "FRGT30"
[33] "FRGT10" "FRGT19" "FRGT02" "FRGT23" "FRGT14" "FRGT08"

Now let’s look at which species are more abundant on average.

n <- 10
d <- d.poisson |>
  group_by(species) |>
  summarise(abundance_avg = mean(abundance)) |>
  arrange(desc(abundance_avg)) |>
  slice(1:n)
d
# A tibble: 10 × 2
   species                   abundance_avg
   <chr>                             <dbl>
 1 Sprattus                           46.3
 2 Pomatoschistus microps             42.1
 3 Pomatoschistus minutus             36.5
 4 Pomatoschistus                     36.4
 5 Alburnus alburnus                  31.3
 6 Sprattus sprattus                  30.8
 7 Osmerus eperlanus                  24.4
 8 Clupea harengus                    22.0
 9 Merlangius merlangus               17.1
10 Pomatoschistus marmoratus          17  

Let’s pull these species and plot their time series.

Note

Is it expected that “Spratus” and “Spratus spratus” are two distinct species? This may be an error when species name was written. If so species names should be cleaned up.

common_sp <- d |> pull(species)

d <- d.poisson |>
  filter(species %in% common_sp) |>
  group_by(species, year) |>
  summarise(abundance_avg = mean(abundance), .groups = "drop")
d$log_abundance <- log(d$abundance_avg)

ggplot(d, aes(x = year, y = log_abundance, color = species)) +
  geom_point() +
  geom_line() +
  labs(x = "Year", y = "Log abundance")

1.2. Plot traits

First coordinates need to be formatted. They are given as strings and should be converted to numeric values.

library(stringr)
d.poisson <- d.poisson |>
  mutate(
    start_x = start_x |> str_replace(",", ".") |> as.numeric(),
    start_y = start_y |> str_replace(",", ".") |> as.numeric(),
    end_x = end_x |> str_replace(",", ".") |> as.numeric(),
    end_y = end_y |> str_replace(",", ".") |> as.numeric(),
  )

summary(d.poisson[, c("start_x", "start_y", "end_x", "end_y")])
    start_x           start_y          end_x             end_y      
 Min.   :-4.6224   Min.   :44.56   Min.   :-4.6234   Min.   :44.56  
 1st Qu.:-3.3806   1st Qu.:47.30   1st Qu.:-3.3791   1st Qu.:47.30  
 Median :-1.5255   Median :47.85   Median :-1.5279   Median :47.85  
 Mean   :-1.8722   Mean   :47.96   Mean   :-1.8720   Mean   :47.96  
 3rd Qu.:-0.7029   3rd Qu.:49.19   3rd Qu.:-0.7024   3rd Qu.:49.18  
 Max.   : 1.6564   Max.   :50.26   Max.   : 1.6525   Max.   :50.26  
ggplot(d.poisson, aes(x = start_x, y = start_y)) +
  geom_segment(aes(xend = end_x, yend = end_y, color = zone)) +
  coord_equal() +
  labs(x = "Longitude", y = "Latitude")

Now let’s add a map in the background with leaflet. Because we have many traits, we will focus on a single zone. First, we need to convert traits to sf object.

library(sf)
library(leaflet)

zone_plot <- first(d.poisson$zone)

traits_sf <- d.poisson |>
  filter(zone == zone_plot) |>
  rowwise() |>
  mutate(
    geometry = st_sfc(st_linestring(
      matrix(c(start_x, end_x, start_y, end_y), ncol = 2, byrow = FALSE)
    ), crs = 4326)
  ) |>
  st_as_sf()

Then, we can plot them, using year for the color gradient.

pal <- colorNumeric(
  palette = "viridis",
  domain = traits_sf$year
)

leaflet(traits_sf) |>
  addTiles() |>
  addPolygons(
    data = traits_sf,
    color = ~ pal(year),
    opacity = 0.7,
    popup = ~ paste("Year: ", year)
  ) |>
  addLegend("bottomright",
    pal = pal, values = ~year,
    title = "Year",
    labFormat = labelFormat(big.mark = ""),
    opacity = 1
  )

Most traits follow the river suggesting that we have processed the data correctly.

Now let’s focus on how fish sizes is measured.

1.3. Get fish size measurement rates

1.3.1. Prepare size data

First let’s read the file containing fish size information.

d.size <- read.csv(here(data_folder, "poisson_tailles_pomet.csv"), sep = ";")
head(d.size)
  ID_interne_passage ID_interne_prelevement
1           60733591               61839708
2           60728092               61836630
3           60728092               61836630
4           60728092               61836630
5           60728092               61836630
6           60728092               61836630
                                                      ID_Zone
1                                          FRGT20 - Le Blavet
2 FRHT08 - la Dives du barrage de Saint Samson a l'embouchure
3 FRHT08 - la Dives du barrage de Saint Samson a l'embouchure
4 FRHT08 - la Dives du barrage de Saint Samson a l'embouchure
5 FRHT08 - la Dives du barrage de Saint Samson a l'embouchure
6 FRHT08 - la Dives du barrage de Saint Samson a l'embouchure
                 Zone Annee Mois       Date    Heure          Trait
1      Le Blavet - ME  2012    5 11/05/2012 12:56:00  Trait n\xb0 3
2 La Dives - estuaire  2016   10 01/10/2016 12:13:00 Trait n\xb0 12
3 La Dives - estuaire  2016   10 01/10/2016 12:13:00 Trait n\xb0 12
4 La Dives - estuaire  2016   10 01/10/2016 12:13:00 Trait n\xb0 12
5 La Dives - estuaire  2016   10 01/10/2016 12:13:00 Trait n\xb0 12
6 La Dives - estuaire  2016   10 01/10/2016 12:13:00 Trait n\xb0 12
  Coord_Deb_xmin Coord_Deb_ymin Coord_Fin_xmin Coord_Fin_ymin Prof
1       -3,28684       47,77047       -3,28449       47,77652  3,1
2        -0,1631      49,193517      -0,164167       49,18925  2,7
3        -0,1631      49,193517      -0,164167       49,18925  2,7
4        -0,1631      49,193517      -0,164167       49,18925  2,7
5        -0,1631      49,193517      -0,164167       49,18925  2,7
6        -0,1631      49,193517      -0,164167       49,18925  2,7
            Engin_peche Pour_satO2 Salinite Temperature_eau     NomScient
1 chalut a perche_1.46m         94      8,6              14       Abramis
2 chalut a perche_1.46m      112,5     0,33           15,29 Abramis brama
3 chalut a perche_1.46m      112,5     0,33           15,29 Abramis brama
4 chalut a perche_1.46m      112,5     0,33           15,29 Abramis brama
5 chalut a perche_1.46m      112,5     0,33           15,29 Abramis brama
6 chalut a perche_1.46m      112,5     0,33           15,29 Abramis brama
  Ecologique Trophique Position Nind_esp Pds_esp Nind_esp_taille
1         FW        IB        D        1       1               1
2         FW         I        D        9     165               1
3         FW         I        D        9     165               1
4         FW         I        D        9     165               1
5         FW         I        D        9     165               1
6         FW         I        D        9     165               1
  Longueur_fourche_mm Longueur_fourche_cm
1                  52                   5
2                  63                   6
3                  55                   6
4                  70                   7
5                  65                   6
6                 193                  19
                                                                                                                         Commentaire
1                                                   Station : B4 b. Coef. de mar\xe9e : 66.0. Issu de la reprise POMET du 21/11/2019
2 Longueur r\xe9elle : 636 m Pr\xe9sence d'Anondonte. Station : 12. Coef. de mar\xe9e : 90.0. Issu de la reprise POMET du 21/11/2019
3 Longueur r\xe9elle : 636 m Pr\xe9sence d'Anondonte. Station : 12. Coef. de mar\xe9e : 90.0. Issu de la reprise POMET du 21/11/2019
4 Longueur r\xe9elle : 636 m Pr\xe9sence d'Anondonte. Station : 12. Coef. de mar\xe9e : 90.0. Issu de la reprise POMET du 21/11/2019
5 Longueur r\xe9elle : 636 m Pr\xe9sence d'Anondonte. Station : 12. Coef. de mar\xe9e : 90.0. Issu de la reprise POMET du 21/11/2019
6 Longueur r\xe9elle : 636 m Pr\xe9sence d'Anondonte. Station : 12. Coef. de mar\xe9e : 90.0. Issu de la reprise POMET du 21/11/2019

As before let’s do a bit of cleaning before anything else.

d.size <- d.size |> select(-c(
  Commentaire,
  Ecologique,
  Trophique,
  Engin_peche,
  Zone
))

head(d.size)
  ID_interne_passage ID_interne_prelevement
1           60733591               61839708
2           60728092               61836630
3           60728092               61836630
4           60728092               61836630
5           60728092               61836630
6           60728092               61836630
                                                      ID_Zone Annee Mois
1                                          FRGT20 - Le Blavet  2012    5
2 FRHT08 - la Dives du barrage de Saint Samson a l'embouchure  2016   10
3 FRHT08 - la Dives du barrage de Saint Samson a l'embouchure  2016   10
4 FRHT08 - la Dives du barrage de Saint Samson a l'embouchure  2016   10
5 FRHT08 - la Dives du barrage de Saint Samson a l'embouchure  2016   10
6 FRHT08 - la Dives du barrage de Saint Samson a l'embouchure  2016   10
        Date    Heure          Trait Coord_Deb_xmin Coord_Deb_ymin
1 11/05/2012 12:56:00  Trait n\xb0 3       -3,28684       47,77047
2 01/10/2016 12:13:00 Trait n\xb0 12        -0,1631      49,193517
3 01/10/2016 12:13:00 Trait n\xb0 12        -0,1631      49,193517
4 01/10/2016 12:13:00 Trait n\xb0 12        -0,1631      49,193517
5 01/10/2016 12:13:00 Trait n\xb0 12        -0,1631      49,193517
6 01/10/2016 12:13:00 Trait n\xb0 12        -0,1631      49,193517
  Coord_Fin_xmin Coord_Fin_ymin Prof Pour_satO2 Salinite Temperature_eau
1       -3,28449       47,77652  3,1         94      8,6              14
2      -0,164167       49,18925  2,7      112,5     0,33           15,29
3      -0,164167       49,18925  2,7      112,5     0,33           15,29
4      -0,164167       49,18925  2,7      112,5     0,33           15,29
5      -0,164167       49,18925  2,7      112,5     0,33           15,29
6      -0,164167       49,18925  2,7      112,5     0,33           15,29
      NomScient Position Nind_esp Pds_esp Nind_esp_taille Longueur_fourche_mm
1       Abramis        D        1       1               1                  52
2 Abramis brama        D        9     165               1                  63
3 Abramis brama        D        9     165               1                  55
4 Abramis brama        D        9     165               1                  70
5 Abramis brama        D        9     165               1                  65
6 Abramis brama        D        9     165               1                 193
  Longueur_fourche_cm
1                   5
2                   6
3                   6
4                   7
5                   6
6                  19

Then let’s rename columns.

d.size <- d.size |> rename(
  zone = ID_Zone,
  year = Annee,
  month = Mois,
  hour = Heure,
  date = Date,
  trait = Trait,
  start_x = Coord_Deb_xmin,
  start_y = Coord_Deb_ymin,
  end_x = Coord_Fin_xmin,
  end_y = Coord_Fin_ymin,
  depth = Prof,
  species = NomScient,
  position = Position,
  abundance = Nind_esp,
  weight_batch = Pds_esp,
  batch_size = Nind_esp_taille,
  oxygen = Pour_satO2,
  salinity = Salinite,
  length_cm = Longueur_fourche_cm,
  length_mm = Longueur_fourche_mm,
  temperature = Temperature_eau
)
head(d.size)
  ID_interne_passage ID_interne_prelevement
1           60733591               61839708
2           60728092               61836630
3           60728092               61836630
4           60728092               61836630
5           60728092               61836630
6           60728092               61836630
                                                         zone year month
1                                          FRGT20 - Le Blavet 2012     5
2 FRHT08 - la Dives du barrage de Saint Samson a l'embouchure 2016    10
3 FRHT08 - la Dives du barrage de Saint Samson a l'embouchure 2016    10
4 FRHT08 - la Dives du barrage de Saint Samson a l'embouchure 2016    10
5 FRHT08 - la Dives du barrage de Saint Samson a l'embouchure 2016    10
6 FRHT08 - la Dives du barrage de Saint Samson a l'embouchure 2016    10
        date     hour          trait  start_x   start_y     end_x    end_y
1 11/05/2012 12:56:00  Trait n\xb0 3 -3,28684  47,77047  -3,28449 47,77652
2 01/10/2016 12:13:00 Trait n\xb0 12  -0,1631 49,193517 -0,164167 49,18925
3 01/10/2016 12:13:00 Trait n\xb0 12  -0,1631 49,193517 -0,164167 49,18925
4 01/10/2016 12:13:00 Trait n\xb0 12  -0,1631 49,193517 -0,164167 49,18925
5 01/10/2016 12:13:00 Trait n\xb0 12  -0,1631 49,193517 -0,164167 49,18925
6 01/10/2016 12:13:00 Trait n\xb0 12  -0,1631 49,193517 -0,164167 49,18925
  depth oxygen salinity temperature       species position abundance
1   3,1     94      8,6          14       Abramis        D         1
2   2,7  112,5     0,33       15,29 Abramis brama        D         9
3   2,7  112,5     0,33       15,29 Abramis brama        D         9
4   2,7  112,5     0,33       15,29 Abramis brama        D         9
5   2,7  112,5     0,33       15,29 Abramis brama        D         9
6   2,7  112,5     0,33       15,29 Abramis brama        D         9
  weight_batch batch_size length_mm length_cm
1            1          1        52         5
2          165          1        63         6
3          165          1        55         6
4          165          1        70         7
5          165          1        65         6
6          165          1       193        19

And cleaning zone ID.

d.size$zone <- substr(d.size$zone, 1, 6)
head(d.size)
  ID_interne_passage ID_interne_prelevement   zone year month       date
1           60733591               61839708 FRGT20 2012     5 11/05/2012
2           60728092               61836630 FRHT08 2016    10 01/10/2016
3           60728092               61836630 FRHT08 2016    10 01/10/2016
4           60728092               61836630 FRHT08 2016    10 01/10/2016
5           60728092               61836630 FRHT08 2016    10 01/10/2016
6           60728092               61836630 FRHT08 2016    10 01/10/2016
      hour          trait  start_x   start_y     end_x    end_y depth oxygen
1 12:56:00  Trait n\xb0 3 -3,28684  47,77047  -3,28449 47,77652   3,1     94
2 12:13:00 Trait n\xb0 12  -0,1631 49,193517 -0,164167 49,18925   2,7  112,5
3 12:13:00 Trait n\xb0 12  -0,1631 49,193517 -0,164167 49,18925   2,7  112,5
4 12:13:00 Trait n\xb0 12  -0,1631 49,193517 -0,164167 49,18925   2,7  112,5
5 12:13:00 Trait n\xb0 12  -0,1631 49,193517 -0,164167 49,18925   2,7  112,5
6 12:13:00 Trait n\xb0 12  -0,1631 49,193517 -0,164167 49,18925   2,7  112,5
  salinity temperature       species position abundance weight_batch batch_size
1      8,6          14       Abramis        D         1            1          1
2     0,33       15,29 Abramis brama        D         9          165          1
3     0,33       15,29 Abramis brama        D         9          165          1
4     0,33       15,29 Abramis brama        D         9          165          1
5     0,33       15,29 Abramis brama        D         9          165          1
6     0,33       15,29 Abramis brama        D         9          165          1
  length_mm length_cm
1        52         5
2        63         6
3        55         6
4        70         7
5        65         6
6       193        19

Furthermore, we can notice that length in mm are specified as caracters instead of numerics.

class(d.size$length_mm)
[1] "character"
d.size$length_mm <- as.numeric(d.size$length_mm)

We can check on a subset of our dataframe that length in mm is ten times the length in cm.

d <- d.size |>
  filter(zone == zone_plot) |>
  filter(!is.na(length_mm), year == 2014)

ggplot(d, aes(x = length_mm, y = length_cm)) +
  geom_point()

So let’s remove the length in cm, which is a duplicate and less precise than the length in mm.

d.size <- d.size |> select(-length_cm)
head(d.size)
  ID_interne_passage ID_interne_prelevement   zone year month       date
1           60733591               61839708 FRGT20 2012     5 11/05/2012
2           60728092               61836630 FRHT08 2016    10 01/10/2016
3           60728092               61836630 FRHT08 2016    10 01/10/2016
4           60728092               61836630 FRHT08 2016    10 01/10/2016
5           60728092               61836630 FRHT08 2016    10 01/10/2016
6           60728092               61836630 FRHT08 2016    10 01/10/2016
      hour          trait  start_x   start_y     end_x    end_y depth oxygen
1 12:56:00  Trait n\xb0 3 -3,28684  47,77047  -3,28449 47,77652   3,1     94
2 12:13:00 Trait n\xb0 12  -0,1631 49,193517 -0,164167 49,18925   2,7  112,5
3 12:13:00 Trait n\xb0 12  -0,1631 49,193517 -0,164167 49,18925   2,7  112,5
4 12:13:00 Trait n\xb0 12  -0,1631 49,193517 -0,164167 49,18925   2,7  112,5
5 12:13:00 Trait n\xb0 12  -0,1631 49,193517 -0,164167 49,18925   2,7  112,5
6 12:13:00 Trait n\xb0 12  -0,1631 49,193517 -0,164167 49,18925   2,7  112,5
  salinity temperature       species position abundance weight_batch batch_size
1      8,6          14       Abramis        D         1            1          1
2     0,33       15,29 Abramis brama        D         9          165          1
3     0,33       15,29 Abramis brama        D         9          165          1
4     0,33       15,29 Abramis brama        D         9          165          1
5     0,33       15,29 Abramis brama        D         9          165          1
6     0,33       15,29 Abramis brama        D         9          165          1
  length_mm
1        52
2        63
3        55
4        70
5        65
6       193

1.3.2. Full size sampling

Now that the data is ready, we want to see at why frequency all individual fish sizes are measured.

We will begin with rough estimates, then refine these at the species and year levels.

d <- d.size |>
  group_by(ID_interne_prelevement, species) |>
  summarise(n_measured = sum(batch_size), .groups = "drop")

d.join <- left_join(d.poisson, d, by = c("ID_interne_prelevement", "species"))
head(d.join)
  ID_interne_passage ID_interne_prelevement   zone year month       date
1           60733591               61839708 FRGT20 2012     5 11/05/2012
2           60736834               61836379 FRHC14 2018    10 08/10/2018
3           60731145               61831377 FRHC10 2018     5 13/05/2018
4           60729058               61837714 FRGT31 2009     5 15/05/2009
5           60736468               61839934 FRGT28 2013     6 19/06/2013
6           60731828               61832476 FRGT28 2010     9 28/09/2010
      hour          trait   start_x  start_y     end_x    end_y depth
1 12:56:00  Trait n\xb0 3 -3.286840 47.77047 -3.284490 47.77652   3,1
2 10:53:00 Trait n\xb0 12 -0.317040 49.18490 -0.324330 49.18178   3,7
3 20:55:00 Trait n\xb0 13 -1.115690 49.33788 -1.117380 49.34686   4,6
4 10:13:00  Trait n\xb0 4 -1.041967 46.32115 -1.031567 46.31883   2,6
5 12:44:00  Trait n\xb0 5 -1.380000 47.29353 -1.366600 47.30060   3,8
6 15:25:00 Trait n\xb0 19 -1.512717 47.21700 -1.496050 47.22120  3,42
        species position abundance weight_batch n_measured
1       Abramis        D         1            1          1
2 Abramis brama        D        49          252         30
3 Abramis brama        D         1         <NA>          1
4 Abramis brama        D        53          480         32
5 Abramis brama        D         1           57          1
6 Abramis brama        D         1           37          1

To check that we did well, we verify that for a fishing event for which we know that all fish sizes have been measured that abundance = n_measured.

d.join |> filter(ID_interne_prelevement == 61836630, species == "Abramis brama")
  ID_interne_passage ID_interne_prelevement   zone year month       date
1           60728092               61836630 FRHT08 2016    10 01/10/2016
      hour          trait start_x  start_y     end_x    end_y depth
1 12:13:00 Trait n\xb0 12 -0.1631 49.19352 -0.164167 49.18925   2,7
        species position abundance weight_batch n_measured
1 Abramis brama        D         9          165          9

First, let’s look at rough estimates.

n_tot <- nrow(d.join) # 1 row: 1 species & 1 fishing event.
n_full <- nrow(d.join |> filter(abundance == n_measured))
n_full / n_tot
[1] 0.8375625

When filtering for full size sampling, we keep 83% of the rows of the original dataframe. This means that most of the time all individual sizes are measured. But, let’s go in further details and how full size sampling depends on year, zone and species.

d.sampling <- d.join |>
  filter(!is.na(n_measured)) |>
  group_by(species, ID_interne_prelevement) |>
  summarise(measurement_rate = n_measured / abundance) |>
  arrange(measurement_rate)
head(d.sampling)
# A tibble: 6 × 3
# Groups:   species [4]
  species                ID_interne_prelevement measurement_rate
  <chr>                                   <int>            <dbl>
1 Pomatoschistus microps               61837579          0.00442
2 Sprattus sprattus                    61835557          0.0048 
3 Pomatoschistus microps               61835680          0.00717
4 Pomatoschistus                       61835515          0.00810
5 Sprattus sprattus                    61836807          0.0112 
6 Trisopterus luscus                   61838997          0.0116 

We have only few species with low full measurement rate. Even the species with worst full size sampling rate hase a rate of 0.5. One reason that can explain why some species are more often fully measured than other is because of their abundance. Measuring many individuals is cumbersome, so we expect a decreasing trend between full measurement rate and average abundance.

d.abundance.avg <- d.poisson |>
  filter(!is.na(abundance)) |>
  group_by(species) |>
  summarise(abundance_avg = mean(abundance))

d.sampling <- left_join(d.sampling, d.abundance.avg, by = "species")

ggplot(d.sampling, aes(x = abundance_avg, y = measurement_rate)) +
  geom_point() +
  labs(x = "Average abundance", y = "Full measurement rate")

Let’s look if we have trends per zone.

d.sampling <- d.join |>
  filter(!is.na(n_measured)) |>
  group_by(zone) |>
  summarise(measurement_rate = mean(n_measured == abundance)) |>
  arrange(measurement_rate)
head(d.sampling)
# A tibble: 6 × 2
  zone   measurement_rate
  <chr>             <dbl>
1 FRGT23            0.594
2 FRAC05            0.635
3 FRFT33            0.766
4 FRHT08            0.802
5 FRGT14            0.806
6 FRFC02            0.822

There is also high variability between zone, with few zones where the measurement rate is low.

For now, we have seen what we wanted. Overall, fish sizes are most of the time all measured. This will help food web reconstruction.

2. Nurse campagne

Now let’s focus on another campagne: Nurse.

2.1. Clean the data

Let’s load the data.

d.nurse.catch <- read.csv(here(data_folder, "Captures_Nurse_1980_2023.csv"), sep = ";")
head(d.nurse.catch)
  Campagne Annee         Trait             Espece Nombre Poids
1    NURSE  1980 1980_1_1527_1  Anguilla anguilla      1 0.039
2    NURSE  1980 1980_1_1527_1    Crangon crangon      1 0.001
3    NURSE  1980 1980_1_1527_1 Platichthys flesus     36 5.501
4    NURSE  1980 1980_1_1527_1        Solea solea     16 0.375
5    NURSE  1980 1980_1_1527_1 Trisopterus luscus      1 0.068
6    NURSE  1980 1980_1_1528_2  Anguilla anguilla      1 0.162
d.nurse.size <- read.csv(here(data_folder, "Tailles_Nurse_1980_2023.csv"), sep = ";")
head(d.nurse.size)
  Campagne Annee         Trait            Espece Sexe Maturite Longueur Nombre
1    NURSE  1980 1980_1_1527_1 Anguilla anguilla    N       NA       27      1
2    NURSE  1980 1980_1_1527_1       Solea solea    N       NA       10      2
3    NURSE  1980 1980_1_1527_1       Solea solea    N       NA       11      3
4    NURSE  1980 1980_1_1527_1       Solea solea    N       NA       12      5
5    NURSE  1980 1980_1_1527_1       Solea solea    N       NA       13      2
6    NURSE  1980 1980_1_1527_1       Solea solea    N       NA       14      1
  Poids Age
1    NA  NA
2    NA  NA
3    NA  NA
4    NA  NA
5    NA  NA
6    NA  NA
d.nurse.trait <- read.csv(here(data_folder, "Traits_Nurse_1980_2023.csv"), sep = ";")
head(d.nurse.trait)
  Campagne Annee         Trait Mois Strate SurfaceBalayee     Lat    Long
1    NURSE  1980 1980_1_1527_1   10  Loire       0.004722 47.2823 -2.1500
2    NURSE  1980 1980_1_1528_2   10  Loire       0.004722 47.2822 -2.1333
3    NURSE  1980 1980_1_1529_3   10  Loire       0.003333 47.2657 -2.1860
4    NURSE  1980 1980_1_1530_4   10  Loire       0.004887 47.2333 -2.2083
5    NURSE  1980 1980_1_1531_5   10  Loire       0.004443 47.2533 -2.2450
6    NURSE  1980 1980_1_1532_6   10  Loire       0.005166 47.2887 -2.0982
  ProfMoy
1      NA
2      NA
3      NA
4      NA
5      NA
6      NA
d.nurse.strate <- read.csv(here(data_folder, "Strates_Nurse_1980_2023.csv"), sep = ";")
head(d.nurse.strate)
  Campagne           Strate  Surface
1    NURSE        Bourgneuf 175.2515
2    NURSE          Gironde 782.7810
3    NURSE            Loire 142.7000
4    NURSE Pertuis Antioche 274.8723
5    NURSE   Pertuis Breton 199.0315
6    NURSE          Vilaine 329.5000

We see that we have four files. The first file contains information about the catches: what, when, where. The second file contains information about the individual sizes. The third file contains geographical information about the trait. I don’t kow what contains the fourth file. Should ask Anik.

Let’s rename column for consistency with other campagnes.

d.nurse.catch <- d.nurse.catch |>
  select(-Campagne) |>
  rename(
    year = Annee,
    trait = Trait,
    species = Espece,
    abundance = Nombre,
    weight_batch = Poids
  )
head(d.nurse.catch)
  year         trait            species abundance weight_batch
1 1980 1980_1_1527_1  Anguilla anguilla         1        0.039
2 1980 1980_1_1527_1    Crangon crangon         1        0.001
3 1980 1980_1_1527_1 Platichthys flesus        36        5.501
4 1980 1980_1_1527_1        Solea solea        16        0.375
5 1980 1980_1_1527_1 Trisopterus luscus         1        0.068
6 1980 1980_1_1528_2  Anguilla anguilla         1        0.162
d.nurse.size <- d.nurse.size |>
  select(-c(Campagne, Poids, Age, Maturite)) |>
  rename(
    year = Annee,
    trait = Trait,
    species = Espece,
    sex = Sexe,
    length = Longueur,
    batch_size = Nombre
  )
head(d.nurse.size)
  year         trait           species sex length batch_size
1 1980 1980_1_1527_1 Anguilla anguilla   N     27          1
2 1980 1980_1_1527_1       Solea solea   N     10          2
3 1980 1980_1_1527_1       Solea solea   N     11          3
4 1980 1980_1_1527_1       Solea solea   N     12          5
5 1980 1980_1_1527_1       Solea solea   N     13          2
6 1980 1980_1_1527_1       Solea solea   N     14          1
d.nurse.trait <- d.nurse.trait |>
  select(-c(Campagne, ProfMoy)) |>
  rename(
    year = Annee,
    trait = Trait,
    month = Mois,
    strate = Strate,
    surface = SurfaceBalayee,
    lat = Lat,
    long = Long
  )
head(d.nurse.trait)
  year         trait month strate  surface     lat    long
1 1980 1980_1_1527_1    10  Loire 0.004722 47.2823 -2.1500
2 1980 1980_1_1528_2    10  Loire 0.004722 47.2822 -2.1333
3 1980 1980_1_1529_3    10  Loire 0.003333 47.2657 -2.1860
4 1980 1980_1_1530_4    10  Loire 0.004887 47.2333 -2.2083
5 1980 1980_1_1531_5    10  Loire 0.004443 47.2533 -2.2450
6 1980 1980_1_1532_6    10  Loire 0.005166 47.2887 -2.0982

2.2. Get general information about the campagne

2.2.1. Evolution of the sampling effort

trait_counts <- d.nurse.trait |>
  group_by(year) |>
  summarise(n_trait = length(unique(trait)))

ggplot(trait_counts, aes(x = year, y = n_trait)) +
  geom_line() +
  geom_point() +
  labs(x = "Year", y = "Number of traits")

surface_year <- d.nurse.trait |>
  group_by(year) |>
  summarise(surface_tot = sum(surface))

ggplot(surface_year, aes(x = year, y = surface_tot)) +
  geom_line() +
  geom_point() +
  labs(x = "Year", y = "Surface sampled")

We see high variability in the sampling effort across the year.

2.2.2. Plot geographical position of traits

trait_sf <- st_as_sf(d.nurse.trait, coords = c("long", "lat"), crs = 4326, remove = FALSE)

leaflet(trait_sf) |>
  addTiles() |>
  addCircleMarkers(
    radius = 2, fillOpacity = 0.7, stroke = FALSE,
    popup = ~ paste("Trait:", trait, "Year:", year)
  )

2.2.3. Fish size measurement rate

d.nurse.batch <- d.nurse.size |>
  group_by(trait, species) |>
  summarise(n_measured = sum(batch_size), .groups = "drop")
head(d.nurse.batch)
# A tibble: 6 × 3
  trait         species            n_measured
  <chr>         <chr>                   <int>
1 1980_1_1527_1 Anguilla anguilla           1
2 1980_1_1527_1 Solea solea                16
3 1980_1_1527_1 Trisopterus luscus          1
4 1980_1_1528_2 Anguilla anguilla           1
5 1980_1_1528_2 Solea solea                82
6 1980_1_1529_3 Anguilla anguilla           1
d.nurse.batch <- left_join(d.nurse.batch, d.nurse.catch, by = c("species", "trait"))
head(d.nurse.batch)
# A tibble: 6 × 6
  trait         species            n_measured  year abundance weight_batch
  <chr>         <chr>                   <int> <int>     <int>        <dbl>
1 1980_1_1527_1 Anguilla anguilla           1  1980         1        0.039
2 1980_1_1527_1 Solea solea                16  1980        16        0.375
3 1980_1_1527_1 Trisopterus luscus          1  1980         1        0.068
4 1980_1_1528_2 Anguilla anguilla           1  1980         1        0.162
5 1980_1_1528_2 Solea solea                82  1980        82        1.70 
6 1980_1_1529_3 Anguilla anguilla           1  1980         1        0.399
n_tot <- nrow(d.nurse.batch)
n_full <- nrow(d.nurse.batch |> filter(n_measured > abundance))
n_full #/ n_tot
[1] 555

For the Nurse campagne, we also have a very high size measurement rate. Let’s investigate the last campagne: SOLPER.

3. Solper campagne

3.1. Clean the data

Let’s load the data.

d.solper.catch <- read.csv(here(data_folder, "captures_SOLPER_2005_2011.csv"), sep = ";")
head(d.solper.catch)
  Campagne Annee Trait  Espece Nombre Poids
1   SOLPER  2005 SPA01 CALMLYR      1   2.9
2   SOLPER  2005 SPA01 ENGRENC    526 575.5
3   SOLPER  2005 SPA01 GOBINIG      1   1.9
4   SOLPER  2005 SPA01 POMOMIN     10  13.4
5   SOLPER  2005 SPA01 SOLESOL      1  31.0
6   SOLPER  2005 SPA01 SPRASPR      7  19.1
d.solper.size <- read.csv(here(data_folder, "tailles_SOLPER_2005_2011.csv"), sep = ";")
head(d.solper.size)
  Campagne Annee Trait  Espece Sexe Maturite Longueur Nombre Poids Age
1   SOLPER  2005 SPA01 SOLESOL    I       NA       16      1    31  NA
2   SOLPER  2005 SPA02 SOLESOL    I       NA        6      1     2  NA
3   SOLPER  2005 SPA02 SOLESOL    I       NA        7      2     8  NA
4   SOLPER  2005 SPA02 SOLESOL    I       NA        8      6    26  NA
5   SOLPER  2005 SPA02 SOLESOL    I       NA        9      3    20  NA
6   SOLPER  2005 SPA02 SOLESOL    I       NA       10      1    10  NA
d.solper.trait <- read.csv(here(data_folder, "traits_SOLPER_2005_2011.csv"), sep = ";")
head(d.solper.trait)
  Campagne Annee Trait Mois Strate SurfaceBalayee      Lat     Long ProfMoy
1   SOLPER  2005 SPA01    8     PA       0.001421 46.14030 -1.22043       6
2   SOLPER  2005 SPA02    8     PA       0.001654 46.14084 -1.18852       6
3   SOLPER  2005 SPA03    8     PA       0.001936 45.81676 -1.20102       6
4   SOLPER  2005 SPA04    8     PA       0.001684 45.79853 -1.19984       7
5   SOLPER  2005 SPA05    8     PA       0.002203 45.79681 -1.23134       8
6   SOLPER  2005 SPA06    9     PA       0.002522 46.13919 -1.25329      13

As for the Nurse campagne, we have one file for the catches, one for the individual sizes, and one for the trait positions.

As before, we will just rename the column for consistency.

d.solper.catch <- d.solper.catch |>
  select(-Campagne) |>
  rename(
    year = Annee,
    trait = Trait,
    species = Espece,
    abundance = Nombre,
    weight_batch = Poids
  )
head(d.solper.catch)
  year trait species abundance weight_batch
1 2005 SPA01 CALMLYR         1          2.9
2 2005 SPA01 ENGRENC       526        575.5
3 2005 SPA01 GOBINIG         1          1.9
4 2005 SPA01 POMOMIN        10         13.4
5 2005 SPA01 SOLESOL         1         31.0
6 2005 SPA01 SPRASPR         7         19.1
d.solper.size <- d.solper.size |>
  select(-c(Campagne, Age, Maturite)) |>
  rename(
    year = Annee,
    trait = Trait,
    species = Espece,
    batch_size = Nombre,
    sex = Sexe,
    length = Longueur,
    weight = Poids
  )
head(d.solper.size)
  year trait species sex length batch_size weight
1 2005 SPA01 SOLESOL   I     16          1     31
2 2005 SPA02 SOLESOL   I      6          1      2
3 2005 SPA02 SOLESOL   I      7          2      8
4 2005 SPA02 SOLESOL   I      8          6     26
5 2005 SPA02 SOLESOL   I      9          3     20
6 2005 SPA02 SOLESOL   I     10          1     10
d.solper.trait <- d.solper.trait |>
  select(-Campagne) |>
  rename(
    year = Annee,
    trait = Trait,
    month = Mois,
    strate = Strate,
    surface = SurfaceBalayee,
    lat = Lat,
    long = Long,
    depth = ProfMoy
  )
head(d.solper.trait)
  year trait month strate  surface      lat     long depth
1 2005 SPA01     8     PA 0.001421 46.14030 -1.22043     6
2 2005 SPA02     8     PA 0.001654 46.14084 -1.18852     6
3 2005 SPA03     8     PA 0.001936 45.81676 -1.20102     6
4 2005 SPA04     8     PA 0.001684 45.79853 -1.19984     7
5 2005 SPA05     8     PA 0.002203 45.79681 -1.23134     8
6 2005 SPA06     9     PA 0.002522 46.13919 -1.25329    13

Now that the data is cleaned let’s look at general information about the campagne.

3.2. Get general information about the campagne

3.2.1. Evolution of sampling effort

trait_year <- d.solper.trait |>
  group_by(year) |>
  summarise(n_trait = length(unique(trait)), surf_tot = sum(surface))

ggplot(trait_year, aes(x = year, y = n_trait, color = surf_tot)) +
  geom_line(color = "grey") +
  geom_point() +
  labs(x = "Year", y = "Number of traits", color = "Total surface")

The see that the sampling effort has been relatively constant, although decreasing with years. Furthermore, as expected, the surface sampled is positively correlated with the number of traits.

3.2.3. Plot trait locations

trait_sf <- st_as_sf(d.solper.trait, coords = c("long", "lat"), crs = 4326, remove = FALSE)

leaflet(trait_sf) |>
  addTiles() |>
  addCircleMarkers(
    radius = 2, fillOpacity = 0.7, stroke = FALSE,
    popup = ~ paste("Trait:", trait, "Year:", year)
  )

3.2.3. Full size sampling

d.solper.batch <- d.solper.size |>
  group_by(species, trait, year) |>
  summarise(n_measured = sum(batch_size), .groups = "drop")

d.solper.batch <- left_join(d.solper.batch, d.solper.catch, by = c("species", "trait", "year"))

n_tot <- nrow(d.solper.batch)
n_full <- nrow(d.solper.batch |> filter(n_measured == abundance))
n_full / n_tot
[1] 1

Wow, it seems that for the Solper campagne every single fish has been measured. It probably means that there has been some filtering about sampled species.