Get diet table from fishbase

Summary

In this notebook we

  • extract a diet table using fishbase

0. Setup

library(renv)
renv::load()
renv::restore()
renv::status()
library(here)
library(DiagrammeR)
library(targets)
library(dplyr)
library(rfishbase)
library(riversea)

1. Extract the diet table

First, we want to get the list of species present in our dataset.

data <- tar_read(sea_data_imputed)
species_list <- data$size |>
  pull(species_valid) |>
  unique()
head(species_list)
[1] "Abramis brama"           "Acipenser sturio"       
[3] "Agonus cataphractus"     "Alburnoides bipunctatus"
[5] "Alburnus alburnus"       "Alosa alosa"            

We have 176 species in our data set. We want to retrive their diet for each of them.

We assume the following food web structure.

DiagrammeR::grViz(readLines(here("figures", "foodweb.dot")))

Ellipses represent broad basal categories on which fishes feed on. Our goal here is to determine for each fish species on which of these categories it feeds, and whether it is piscivorous or not. To do so we scrap fishbase.

items <- rfishbase::fb_tbl("diet_items")
diet_species <- rfishbase::diet(species_list)
diet_category <- diet_species |>
  left_join(items, by = "DietCode") |>
  select(
    Species,
    SampleStage,
    DietCode,
    FoodI,
    FoodII,
    FoodIII,
    ItemName,
    Stage,
    DietPercent
  )
head(diet_category)
# A tibble: 6 × 9
  Species   SampleStage DietCode FoodI FoodII FoodIII ItemName Stage DietPercent
  <chr>     <chr>          <int> <chr> <chr>  <chr>   <chr>    <chr>       <dbl>
1 Abramis … adults           383 zoob… benth… amphip… amphipo… juv.…      16    
2 Abramis … adults           383 zoob… benth… benth.… benth. … juv.…       4.76 
3 Abramis … adults           383 plan… other… benthi… benthic… n.a.…       0.430
4 Abramis … adults           383 zoob… mollu… bivalv… bivalves juv.…       5.08 
5 Abramis … adults           383 detr… detri… debris  debris   n.a.…      18.7  
6 Abramis … adults           383 zoob… insec… insects insects  larv…      40    

We see that the food is described by the three columns FoodI, FoodII and FoodIII. The question is how we map our categories using those.

diet_category |>
  pull(FoodI) |>
  unique()
[1] "zoobenthos"  "plants"      "detritus"    "zooplankton" "others"     
[6] "nekton"     

Here we see that only the “plants” and “others” category cannot be classified directly. Nekton correspond to other fishes so it implies piscivory.

We can go further in the classification for the ambiguous categories.

diet_category |>
  filter(FoodI == "plants") |>
  select(FoodII, FoodIII) |>
  distinct()
# A tibble: 7 × 2
  FoodII        FoodIII                 
  <chr>         <chr>                   
1 other plants  benthic algae/weeds     
2 other plants  periphyton              
3 other plants  terrestrial plants      
4 phytoplankton n.a./other phytoplankton
5 phytoplankton diatoms                 
6 phytoplankton green algae             
7 phytoplankton dinoflagellates         

So using these subcategories we can categorize fish diet using our categories

  • benthic algae > macrophytes;
  • periphyton > biofilm;
  • terrestrial plants > macrophytes;
  • phytoplankton > phytoplantkon.
diet_category |>
  filter(FoodI == "others") |>
  select(FoodII, FoodIII) |>
  distinct()
# A tibble: 4 × 2
  FoodII FoodIII         
  <chr>  <chr>           
1 others n.a./others     
2 birds  sea birds       
3 birds  n.a./other birds
4 herps  toads/frogs     

These correspond to categories we do not consider in our framework, se we choose to ignore them.

Like this we can extract a diet table

diet_table <- tar_read(diet)
head(diet_table)
# A tibble: 6 × 3
  species       stage  prey_category
  <chr>         <chr>  <chr>        
1 Abramis brama adults detritus     
2 Abramis brama adults fish         
3 Abramis brama adults insect       
4 Abramis brama adults crustacean   
5 Abramis brama adults worm         
6 Abramis brama adults zooplankton  

which for each species, and life stage, tells what it eats. We can also have the table a in “wide” format, where we have one column for each basal category.

diet_table_wide <- tar_read(diet_wide)
head(diet_table_wide)
# A tibble: 6 × 13
  species   stage biofilm crustacean detritus echinoderm  fish insect macrophyte
  <chr>     <chr>   <dbl>      <dbl>    <dbl>      <dbl> <dbl>  <dbl>      <dbl>
1 Abramis … recr…       0          1        0          0     0      1          0
2 Abramis … juv.…       0          1        1          0     0      1          1
3 Abramis … adul…       0          1        1          0     1      1          0
4 Acipense… recr…       0          1        0          0     1      1          0
5 Acipense… juv.…       0          1        0          0     1      0          0
6 Agonus c… adul…       0          1        0          0     0      0          0
# ℹ 4 more variables: mollusk <dbl>, phytoplankton <dbl>, worm <dbl>,
#   zooplankton <dbl>

2. Postprocess diet table

2.1. Add larvae stage

Next, we want to add a larvae stage for all species.

d_larvae <- diet_table_wide |>
  group_by(species) |>
  summarise(has_larvae = "larvae" %in% stage, .groups = "drop")
d_larvae |> count(has_larvae)
# A tibble: 2 × 2
  has_larvae     n
  <lgl>      <int>
1 FALSE        157
2 TRUE          19

So we see that for most species, they do not have a larvae stage. We can add them with a utility function. We assume that larvae only feed on zooplankton.

diet_with_larvae <- add_larvae(diet_table_wide)
diet_with_larvae |> filter(species == sample(species, 1))
# A tibble: 2 × 13
  species   stage biofilm crustacean detritus echinoderm  fish insect macrophyte
  <chr>     <chr>   <dbl>      <dbl>    <dbl>      <dbl> <dbl>  <dbl>      <dbl>
1 Barbatul… larv…       0          0        0          0     0      0          0
2 Barbatul… juv.…       0          1        0          0     0      1          0
# ℹ 4 more variables: mollusk <dbl>, phytoplankton <dbl>, worm <dbl>,
#   zooplankton <dbl>

2.2. Remove rare species

To focus on important species for the community we study we filter for very rare species. Let’s have a look.

sea_data <- tar_read(sea_data_imputed)
occurences <- sea_data$catch |>
  group_by(species_valid) |>
  summarise(occurence = n(), .groups = "drop") |>
  arrange(occurence)
occ_min <- 10
count_occ <- occurences |> count(occurence >= occ_min)
n_remaining <- pull(count_occ, n)[2]
count_occ
# A tibble: 2 × 2
  `occurence >= occ_min`     n
  <lgl>                  <int>
1 FALSE                     69
2 TRUE                     107

We see that we have many species have a low occurence. For example, if you choose a threshold of 10, we have only 107 species that remain in the data set.

We created an utility function to remove rare species from the diet table.

diet_no_rare <- tar_read(diet_no_rare)
head(diet_no_rare)
# A tibble: 6 × 13
  species   stage biofilm crustacean detritus echinoderm  fish insect macrophyte
  <chr>     <chr>   <dbl>      <dbl>    <dbl>      <dbl> <dbl>  <dbl>      <dbl>
1 Abramis … recr…       0          1        0          0     0      1          0
2 Abramis … larv…       0          0        0          0     0      0          0
3 Abramis … juv.…       0          1        1          0     0      1          1
4 Abramis … adul…       0          1        1          0     1      1          0
5 Agonus c… larv…       0          0        0          0     0      0          0
6 Agonus c… adul…       0          1        0          0     0      0          0
# ℹ 4 more variables: mollusk <dbl>, phytoplankton <dbl>, worm <dbl>,
#   zooplankton <dbl>

We can check if we have species with missing diet.

diet_no_rare |>
  filter(is.na(stage)) |>
  pull(species)
[1] "Engraulis maeoticus" "Raja microocellata"  "Symphodus bailloni" 

3. Life stage lengths

library(rfishbase)

species_list <- diet_no_rare |>
  pull(species) |>
  unique()
d_maturity <- maturity(species_list) |>
  select(Species, Lm) |>
  filter(!is.na(Lm)) |>
  group_by(Species) |>
  summarise(
    Lm_avg = mean(Lm),
    .groups = "drop"
  )
head(d_maturity)
# A tibble: 6 × 2
  Species            Lm_avg
  <chr>               <dbl>
1 Abramis brama       28.8 
2 Alburnus alburnus    9.90
3 Alosa alosa         42.5 
4 Alosa fallax        28.9 
5 Ammodytes tobianus  13   
6 Anguilla anguilla   50.8 
species_missing <- setdiff(species_list, d_maturity$Species)
maturity("Barbus barbus")
# A tibble: 4 × 41
  Species    SpecCode autoctr StockCode MaturityRefNo Sex   AgeMatMin AgeMatMin2
  <chr>         <int>   <int>     <int>         <int> <chr>     <dbl>      <dbl>
1 Barbus ba…     4472    2212      4670          6258 fema…         4          5
2 Barbus ba…     4472    2213      4670          6258 male          3          4
3 Barbus ba…     4472    7184      4670         59043 fema…         3          7
4 Barbus ba…     4472    5919      4670         59043 male          2          5
# ℹ 33 more variables: AgeMatRef <int>, tm <dbl>, Number <int>, r2 <dbl>,
#   SE_tm <dbl>, SD_tm <dbl>, LCL_tm <dbl>, UCL_tm <dbl>, LengthMatMin <dbl>,
#   LengthMatMin2 <dbl>, Type1 <chr>, LengthMatRef <int>, Lm <dbl>,
#   LCL_Lm <dbl>, UCL_Lm <dbl>, LmaxLm <dbl>, LmaxLmType <chr>,
#   LmaxLmRef <int>, LmaxStudy <dbl>, LmaxStudyType <chr>, SE_Lm <dbl>,
#   SD_Lm <dbl>, Locality <chr>, C_Code <chr>, E_CODE <int>, Comment <chr>,
#   Entered <int>, DateEntered <dttm>, Modified <int>, DateModified <dttm>, …
size_extrema <- tar_read(size_extrema) |> rename(species = species_valid)
maturity_length <- tar_read(maturity_length)
sizes <- inner_join(maturity_length, size_extrema, by = "species")
head(sizes)
# A tibble: 6 × 4
  species            maturity_length length_min length_max
  <chr>                        <dbl>      <dbl>      <dbl>
1 Abramis brama                28.8        1.8        56  
2 Alburnus alburnus             9.90       1.2        15.6
3 Alosa alosa                  42.5        4.2        36.5
4 Alosa fallax                 28.9        2.8        40  
5 Ammodytes tobianus           13          3.2        27  
6 Anguilla anguilla            50.8        2.84       96  
sizes |> filter(maturity_length > length_max | maturity_length < length_min)
# A tibble: 8 × 4
  species                 maturity_length length_min length_max
  <chr>                             <dbl>      <dbl>      <dbl>
1 Alosa alosa                        42.5        4.2       36.5
2 Chelidonichthys cuculus            26.1        7         23  
3 Clupea harengus                    22.5        2.7       22.5
4 Diplodus sargus                    18.3        3.5       13.9
5 Lophius piscatorius                54.9       13         27  
6 Pollachius pollachius              39.4        3.8       35  
7 Raja microocellata                 71.7       13         71  
8 Sparus aurata                      30.8        2.5       26.5

Maybe only juveniles are captured?

diet_full <- tar_read(diet_size)
head(diet_full)
# A tibble: 6 × 15
  species     length_min length_max stage biofilm crustacean detritus echinoderm
  <chr>            <dbl>      <dbl> <chr>   <dbl>      <dbl>    <dbl>      <dbl>
1 Abramis br…       0          2.00 larv…       0          0        0          0
2 Abramis br…       2.00       2.00 juv.…       0          1        1          0
3 Abramis br…       2.00     Inf    adul…       0          1        1          0
4 Agonus cat…       0          2.00 larv…       0          0        0          0
5 Agonus cat…       2.00     Inf    comb…       0          1        0          0
6 Alburnus a…       0          2.00 larv…       0          0        0          0
# ℹ 7 more variables: fish <dbl>, insect <dbl>, macrophyte <dbl>,
#   mollusk <dbl>, phytoplankton <dbl>, worm <dbl>, zooplankton <dbl>