From fe2f8d014f0d40118d8282a212b1b67b2cdcdc7b Mon Sep 17 00:00:00 2001 From: "WUR\\simon083" <wolfram.simon@wur.nl> Date: Thu, 3 Jun 2021 09:51:33 +0200 Subject: [PATCH] Final R script for extracted GEOTIFF (in qgis) of SPAM and GAUL2015 --- Fertilization_rates/Fertilizatio_rates.R | 2 +- .../SPAM2010_GAUL2015_SpatialEx.R | 506 ++++++++++++++++++ .../SPAM2010_geotiff_aggregationADM1.R | 2 +- 3 files changed, 508 insertions(+), 2 deletions(-) create mode 100644 SPAM_Data_Transformation/SPAM2010_GAUL2015_SpatialEx.R diff --git a/Fertilization_rates/Fertilizatio_rates.R b/Fertilization_rates/Fertilizatio_rates.R index ac39625..bd83460 100644 --- a/Fertilization_rates/Fertilizatio_rates.R +++ b/Fertilization_rates/Fertilizatio_rates.R @@ -67,7 +67,7 @@ fertilizer_FAO_EU28 = nutrients_FAO_global %>% filter(., Area %in% cntr_FAO_SPAM # SPAM crop mapping SPAM2010_CropList = read_excel("Input_data/Mappings/SPAM2010_CropList.xlsx", sheet = "crop_map_distinct") %>% - mutate_at(c("short_spam2010"), funs(recode(., 'dveg'='GreenVeg', + mutate_at(c("short_spam2010"), funs(recode(., 'oveg'='GreenVeg', 'oveg'='RestVeg', 'rveg'='RedYelVeg', 'tnut'='tnuts'))) diff --git a/SPAM_Data_Transformation/SPAM2010_GAUL2015_SpatialEx.R b/SPAM_Data_Transformation/SPAM2010_GAUL2015_SpatialEx.R new file mode 100644 index 0000000..82b66bc --- /dev/null +++ b/SPAM_Data_Transformation/SPAM2010_GAUL2015_SpatialEx.R @@ -0,0 +1,506 @@ +# This file is a result of a qgis extraction of all the different geotif maps. +# In qgis I found that the extracted values were closest to the tabular values of the csv SPAM2010. +# Therefore, R and Arc were rejected and zonal statistics was done in qgis with the gaul2015 files as shapes. + + +# Load packages ---------------------------------------------------------- +easypackages::packages(c("tidyverse", 'sf', 'raster','sp', 'spatialEco', + 'exactextractr', 'here', "readxl", "foreign", + "data.table", "rbenchmark", "benchmarkme", + "validate", "forcats", "devtools","magrittr", + "rlist", "largeList", "snow", "foreign", + "reshape2", "margrittr", "stringi")) + + +# Functions --------------------------------------------------------------- +# Functions loading dbf SPAM files +FUNload_list_dbf_SPAM <- function(path, type) { + pattern <- switch (type, + "harv" = "_H_", + "phys" = "_A_", + "prod" = "_P_") # how to add intesnity levels here? Multiple arguments in the pattern? + path <- list.files(path = path, + full.names = TRUE, + pattern = pattern) + l_dbf <- lapply(path, foreign::read.dbf) + names(l_dbf) <- basename(path) + return(l_dbf) +} + +# Functions Treenuts +load_list_raster <- function(path, type) { + pattern <- switch (type, + "yield" = "_YieldPerHectare\\.tif", + "area" = "_HarvestedAreaHectares\\.tif", + "prod" = "_Production\\.tif") + path <- list.files(path = path, + full.names = TRUE, + pattern = pattern) + l_raster <- lapply(path, raster::raster) + # crop_names <- stringr::str_extract(basename(path), "^.+(?=_)") + # names(l_raster) <- crop_names + return(l_raster) +} + +# Zonal statistics with GUAL2015 +FunZone_sum=function(rast){ # raster and vector + c=zonal.stats(shape_gaul2015, rast, stats = c("sum")) + c$ADM0_ADM1<- shape_gaul2015$ADM0_ADM1 # assigning the identifier of the original df to the extracted df + dat <- merge(shape_gaul2015, c, by.x = 'ADM0_ADM1', by.y = 'ADM0_ADM1') # merge the two files together +} + +FUN_select_values = function(DF_list){ + dplyr::select(DF_list, starts_with("sum")) %>% as_tibble() %>% + dplyr::select(-geometry) +} + +# Create factors based on other factors conditionally +FunFactCond = function(df, match_vec){ + df %>% mutate(first_tx = ifelse(name %in% match_vec, "tnuts", "rest")) +} + +# Create factors based on other factors conditionally +FunFactCond_veg = function(df, match_vec){ + df %>% mutate(first_tx = ifelse(name %in% match_green, "dark_green", + ifelse(name %in% match_red, "red_yellow", "other_veg"))) +} + + + +# Load data -------------------------------------------------------------- +SPAMdbf_harv = FUNload_list_dbf_SPAM(path="C:\\Users\\simon083\\OneDrive - Wageningen University & Research\\PhD_WJS\\Academic\\RQ1\\Data_analysis\\Spatial_mod\\Modified_scripts\\R\\CifoS_Crop\\Input_data\\SPAM_qgis_ex_dbf\\", type = "harv") +SPAMdbf_prod = FUNload_list_dbf_SPAM(path="C:\\Users\\simon083\\OneDrive - Wageningen University & Research\\PhD_WJS\\Academic\\RQ1\\Data_analysis\\Spatial_mod\\Modified_scripts\\R\\CifoS_Crop\\Input_data\\SPAM_qgis_ex_dbf\\", type = "prod") +SPAMdbf_phys = FUNload_list_dbf_SPAM(path="C:\\Users\\simon083\\OneDrive - Wageningen University & Research\\PhD_WJS\\Academic\\RQ1\\Data_analysis\\Spatial_mod\\Modified_scripts\\R\\CifoS_Crop\\Input_data\\SPAM_qgis_ex_dbf\\", type = "phys") + +# Data transformation ----------------------------------------------------- +# Clean lists +SPAMdbf_harv = lapply(SPAMdbf_harv, function(df){dplyr::select(df, -c(STR1_YEAR:DISP_AREA, Shape_Leng:Shape_Area))}) +SPAMdbf_prod = lapply(SPAMdbf_prod, function(df){dplyr::select(df, -c(STR1_YEAR:DISP_AREA, Shape_Leng:Shape_Area))}) +SPAMdbf_phys = lapply(SPAMdbf_phys, function(df){dplyr::select(df, -c(STR1_YEAR:DISP_AREA, Shape_Leng:Shape_Area))}) + +# Bind lists together, pull the list names as dataframe column - https://stackoverflow.com/questions/48255351/create-a-column-based-on-the-name-of-the-element-list-that-contain-the-data-fram +df_harv = dplyr::bind_rows(SPAMdbf_harv, .id = "meta_information") %>% as_tibble() %>% + mutate_at(vars(X_zonalsum,X_sum,zonal_sum), ~replace_na(., 0)) %>% rowwise() %>% + mutate(value = X_zonalsum+X_sum+zonal_sum) %>% dplyr::select(-c(zonal_sum:X_mean)) %>% + mutate(rec_type = "H") + +df_prod = dplyr::bind_rows(SPAMdbf_prod, .id = "meta_information") %>% as_tibble() %>% + mutate(rec_type = "P") %>% rename(value = X_zonalsum) + +df_phys = dplyr::bind_rows(SPAMdbf_phys, .id = "meta_information") %>% as_tibble() %>% + mutate_at(vars(X_zonalsum,zonal_sum), ~replace_na(., 0)) %>% rowwise() %>% + mutate(value = X_zonalsum+zonal_sum) %>% dplyr::select(-c(X_zonalsum:zonal_sum)) %>% + mutate(rec_type = "A") + +# Bind dfs togehter +df_rec = dplyr::bind_rows(df_harv, df_prod, df_phys) + +# Add columns with crops and tech type levels and recode the tech levels +SPAM2010_adm1_exdbf = df_rec %>% + separate(meta_information, c("1", "2", "3", "Rec_type", "crop", "tech_type"), sep = "_") %>% + dplyr::select(-c(1, 2, 3)) %>% + mutate_at(c("tech_type"), funs(recode(.,'A.dbf' = 'A', + 'I.dbf' = 'I', + 'R.dbf' = 'R', + 'L.dbf' = 'L', + 'S.dbf' = 'S', + 'H.dbf' = 'H'))) %>% + dplyr::select(-Rec_type) + +write_csv(SPAM2010_adm1_exdbf, here("Input_data", "SPAM2010_adm1_exdbf_global.csv")) +# SPAM2010_adm1_exdbf =read_csv(here("Input_data", "SPAM2010_adm1_exdbf_global.csv")) +# Treenuts ---------------------------------------------------------------- +# Load GAUL2015 ----------------------------------------------------------- + +shape_gaul2015=read_sf(here::here("Input_data", "g2015_2014_1.shp")) +shape_gaul2015$ADM0_ADM1 = paste(shape_gaul2015$ADM0_CODE, shape_gaul2015$ADM1_CODE, sep= "_") +shape_gaul_short_tbl = shape_gaul2015 %>% as_tibble() %>% dplyr::select(-geometry) + +# Load treenut data ------------------------------------------------------- +# Load harvested area and production raster data +rast_harv = load_list_raster(path=here::here("Input_data"), type = "area") +rast_prod = load_list_raster(path=here::here("Input_data"), type = "prod") + +# Zonal statistics Treenuts ----------------------------------------------- +lst_ex_pd = lapply(rast_prod, FunZone_sum) +lst_ex_ha = lapply(rast_harv, FunZone_sum) + +# Select the extraction value columns +lst_values_pd = lapply(lst_ex_pd, FUN_select_values) +lst_values_ha = lapply(lst_ex_ha, FUN_select_values) + +# Merging all lists together to a df +dat_ex_pd = do.call("cbind", lst_values_pd) # merging all tech types +dat_ex_ha = do.call("cbind", lst_values_ha) + +# vector containing all tnuts names +tnuts_vec = c("sum.walnut_Production", "sum.almond_Production","sum.chestnut_Production", + "sum.cashew_Production","sum.pistachio_Production","sum.hazelnut_Production", + "sum.brazil_Production","sum.nutnes_Production") + +# production values tnuts adm1 -------------------------- +treenut_pd = lst_ex_pd[[2]] %>% as_tibble() %>% + dplyr::select(ADM0_ADM1, ADM1_CODE , ADM0_CODE, ADM1_NAME, ADM0_NAME,geometry) %>% + bind_cols(., dat_ex_pd) %>% + dplyr::select(ADM0_ADM1, ADM1_CODE , ADM0_CODE, ADM1_NAME, ADM0_NAME, + matches("^sum\\..+_Production$"), geometry) %>% + group_by(ADM0_CODE,ADM1_CODE) %>% + mutate(treenut_pd = sum(sum.walnut_Production, + sum.almond_Production, + sum.chestnut_Production, + sum.cashew_Production, + sum.pistachio_Production, + sum.hazelnut_Production, + sum.brazil_Production, + sum.nutnes_Production)) %>% + mutate(rest_pd = sum(sum.aniseetc_Production, + sum.chilleetc_Production, + sum.kolanut_Production, + sum.areca_Production, + sum.cinnamon_Production, + sum.ginger_Production, + sum.hop_Production, + sum.mate_Production, + sum.nutmeg_Production, + sum.pepper_Production, + sum.peppermint_Production, + sum.pyrethrum_Production, + sum.ramie_Production, + sum.rubber_Production, + sum.spicenes_Production, + sum.sugarnes_Production, + sum.vanilla_Production)) %>% + dplyr::select(ADM0_ADM1, ADM1_CODE , ADM0_CODE, ADM1_NAME, ADM0_NAME,treenut_pd, rest_pd) + + +# validation of aggregation +# Validate_agg = xcv %>%dplyr::select(tnuts_vec, iso3, name_adm1) %>% +# pivot_longer(cols = starts_with("sum."), +# names_to = "Nuts", +# values_to = "value") + +# calcluating the harvested area values for treenuts -------------------------- +treenut_ha = lst_ex_ha[[2]] %>% as_tibble() %>% + dplyr::select(ADM0_ADM1, ADM1_CODE , ADM0_CODE, ADM1_NAME, ADM0_NAME, geometry) %>% + bind_cols(., dat_ex_ha) %>% + dplyr::select(ADM0_ADM1, ADM1_CODE , ADM0_CODE, ADM1_NAME, ADM0_NAME, + matches("^sum\\..+_HarvestedAreaHectares$"), geometry) %>% + group_by(ADM0_CODE, ADM1_CODE) %>% + mutate(treenut_ha = sum(sum.walnut_HarvestedAreaHectares, + sum.almond_HarvestedAreaHectares, + sum.chestnut_HarvestedAreaHectares, + sum.cashew_HarvestedAreaHectares, + sum.pistachio_HarvestedAreaHectares, + sum.hazelnut_HarvestedAreaHectares, + sum.brazil_HarvestedAreaHectares, + sum.nutnes_HarvestedAreaHectares)) %>% + mutate(rest_ha = sum(sum.aniseetc_HarvestedAreaHectares, + sum.chilleetc_HarvestedAreaHectares, + sum.kolanut_HarvestedAreaHectares, + sum.areca_HarvestedAreaHectares, + sum.cinnamon_HarvestedAreaHectares, + sum.ginger_HarvestedAreaHectares, + sum.hop_HarvestedAreaHectares, + sum.mate_HarvestedAreaHectares, + sum.nutmeg_HarvestedAreaHectares, + sum.pepper_HarvestedAreaHectares, + sum.peppermint_HarvestedAreaHectares, + sum.pyrethrum_HarvestedAreaHectares, + sum.ramie_HarvestedAreaHectares, + sum.rubber_HarvestedAreaHectares, + sum.spicenes_HarvestedAreaHectares, + sum.sugarnes_HarvestedAreaHectares, + sum.vanilla_HarvestedAreaHectares)) %>% + dplyr::select(ADM0_ADM1, ADM1_CODE, ADM0_CODE, ADM1_NAME, ADM0_NAME, treenut_ha, rest_ha) + +# Writing treenut intermediate files +st_write(treenut_pd, here::here("Input_data", "Core_data", "GAUL2015_tnuts_pd.shp"), + driver = "ESRI Shapefile") +st_write(treenut_ha, here::here("Input_data", "Core_data", "GAUL2015_tnuts_ha.shp"), + driver = "ESRI Shapefile") + +# write as csv +write_csv(treenut_ha, here("Input_data", "treenut_ha.csv")) +write_csv(treenut_pd, here("Input_data", "treenut_pd.csv")) + + +prod.tib.adm1 = treenut_pd %>% as_tibble() %>% + dplyr::select(ADM0_ADM1, ADM1_CODE, ADM0_CODE, ADM1_NAME, ADM0_NAME, treenut_pd, rest_pd) %>% + dplyr::group_by(ADM0_CODE,ADM1_CODE) %>% + dplyr::summarise(round(across(.cols = c(treenut_pd, rest_pd), .fns = sum),1)) + +# Harvested area +harv.tib.adm1 = treenut_ha %>% as_tibble() %>% + dplyr::select(ADM0_ADM1, ADM1_CODE, ADM0_CODE, ADM1_NAME, ADM0_NAME, treenut_ha, rest_ha) %>% + dplyr::group_by(ADM0_CODE,ADM1_CODE) %>% + dplyr::summarise(round(across(.cols = c(treenut_ha, rest_ha), .fns = sum),1)) #rounding is kept as in SPAM2010; 1 decimal! + +# Join harvested area and calcluate tree nuts ratio -------------------------- +# Validation: Calulation of tnuts ratio correct! +TnutsRatio_ha_pd_GAUL2015 = treenut_pd %>% + group_by(ADM0_CODE,ADM1_CODE) %>% + left_join(., treenut_ha, by=c("ADM0_CODE","ADM1_CODE")) %>% + rowwise() %>% + mutate(tnuts_ratio_pd = treenut_pd/(treenut_pd + rest_pd), + tnuts_ratio_ha = treenut_ha/(treenut_ha + rest_ha)) %>% + mutate_at(vars(tnuts_ratio_ha, tnuts_ratio_pd), ~replace(., is.nan(.), 0)) %>% + # dplyr::rename(ADM0_CODE = ADM0_CODE.x, ADM1_CODE = ADM1_CODE.x) %>% + dplyr::select(ADM0_CODE,ADM1_CODE, tnuts_ratio_pd, tnuts_ratio_ha) + +# write and read treenuts data ---------------------------------------------- +write_csv(TnutsRatio_ha_pd_GAUL2015, here::here("Input_data", "GAUL2015_ratio_tnuts_ha_pd.csv")) +TnutsRatio_ha_pd_GGS= read_csv(here::here("Input_data", "GAUL2015_ratio_tnuts_ha_pd.csv")) + +# VEGGIES DISAGGREGATION ------------------------------------------------- +# Loading veggie raster files ------------------------------------------------ +# # Functions Veggies +# load_list_raster <- function(path, type) { +# pattern <- switch (type, +# "yield" = "_YieldPerHectare\\.tif", +# "area" = "_HarvestedAreaHectares\\.tif", +# "prod" = "_Production\\.tif") +# path <- list.files(path = path, +# full.names = TRUE, +# pattern = pattern) +# l_raster <- lapply(path, raster::raster) +# # crop_names <- stringr::str_extract(basename(path), "^.+(?=_)") +# # names(l_raster) <- crop_names +# return(l_raster) +# } +# # +# # Exctracting veggies and rest area and production value per adm1 zones +# FunZone_sum=function(rast){ # raster and vector +# c=zonal.stats(ADM1_SPAM_GGS, rast, stats = c("sum")) +# c$name_adm1 <- ADM1_SPAM_GGS$name_adm1 # assigning the identifier of the original df to the extracted df +# dat <- left_join(ADM1_SPAM_GGS, c, by = 'name_adm1') # merge the two files together +# } +# +# FUN_select_values = function(DF_list){ +# dplyr::select(DF_list, starts_with("sum")) %>% as_tibble() %>% +# dplyr::select(-geometry) +# } + + + +# Load treenut data ------------------------------------------------------- +# Load harvested area and production raster data +rast_harv_veg = load_list_raster(path=here::here("Input_data", "Veggies_all"), type = "area") +rast_prod_veg = load_list_raster(path=here::here("Input_data", "Veggies_all"), type = "prod") + +# Zonal statistics Treenuts ----------------------------------------------- +lst_ex_pd_veg = lapply(rast_prod_veg, FunZone_sum) +lst_ex_ha_veg = lapply(rast_harv_veg, FunZone_sum) + +# Select the extraction value columns +lst_values_pd_veg = lapply(lst_ex_pd_veg, FUN_select_values) +lst_values_ha_veg = lapply(lst_ex_ha_veg, FUN_select_values) + +# Merging all lists together to a df +dat_ex_pd_veg = do.call("cbind", lst_values_pd_veg) # merging all tech types +dat_ex_ha_veg = do.call("cbind", lst_values_ha_veg) + +# production values tnuts adm1 -------------------------- +veggie_pd = lst_ex_pd_veg[[2]] %>% as_tibble() %>% + dplyr::select(ADM0_ADM1, ADM1_CODE, ADM0_CODE, ADM1_NAME, ADM0_NAME, geometry) %>% + bind_cols(., dat_ex_pd_veg) %>% + dplyr::select(ADM0_ADM1, ADM1_CODE, ADM0_CODE, ADM1_NAME, ADM0_NAME, + matches("^sum\\..+_Production$"), geometry) %>% + group_by(ADM0_CODE, ADM1_CODE) %>% + mutate(dark_green_pd = sum(sum.spinach_Production, + sum.vetch_Production)) %>% + mutate(red_yellow_pd = sum(sum.tomato_Production, + sum.pumpkinetc_Production, + sum.carrot_Production)) %>% + mutate(other_veg_pd = sum(sum.artichoke_Production, + sum.asparagus_Production, + sum.cabbage_Production, + sum.carob_Production, + sum.cauliflower_Production, + sum.chicory_Production, + sum.cucumberetc_Production, + sum.eggplant_Production, + sum.garlic_Production, + sum.greenbean_Production, + sum.greenbroadbean_Production, + sum.greencorn_Production, + sum.greenonion_Production, + sum.greenpea_Production, + sum.legumenes_Production, + sum.lettuce_Production, + sum.mushroom_Production, + # sum.olive_Production, + sum.onion_Production, + sum.stringbean_Production, + sum.vegetablenes_Production)) %>% + dplyr::select(ADM0_ADM1, ADM1_CODE, ADM0_CODE, ADM1_NAME, ADM0_NAME, + dark_green_pd, red_yellow_pd, other_veg_pd) + + +# validation of veggie aggregation --------------------------------------- +# vector containing all three veggie class names +# veg_dark_vec = c("sum.spinach_Production","sum.vetch_Production") +# veg_red_vec = c("sum.tomato_Production","sum.pumpkinetc_Production", +# "sum.carrot_Production") +# veg_other_vec = c("sum.artichoke_Production","sum.asparagus_Production", +# "sum.cabbage_Production","sum.carob_Production", +# "sum.cauliflower_Production","sum.chicory_Production", +# "sum.cucumberetc_Production","sum.eggplant_Production", +# "sum.garlic_Production","sum.greenbean_Production", +# "sum.greenbroadbean_Production","sum.greencorn_Production", +# "sum.greenonion_Production","sum.greenpea_Production", +# "sum.legumenes_Production","sum.lettuce_Production", +# "sum.mushroom_Production",# sum.olive_Production, +# "sum.onion_Production","sum.stringbean_Production", +# "sum.vegetablenes_Production") +# +# Validate_agg = veggies_pd %>%dplyr::select(tnuts_vec, iso3, name_adm1) %>% +# pivot_longer(cols = starts_with("sum."), +# names_to = "veggies", +# values_to = "value") + +# calculating the harvested area values for veggies -------------------------- +veggie_ha = lst_ex_ha_veg[[2]] %>% as_tibble() %>% + dplyr::select(ADM0_ADM1, ADM1_CODE, ADM0_CODE, ADM1_NAME, ADM0_NAME, geometry) %>% + bind_cols(., dat_ex_ha_veg) %>% + dplyr::select(ADM0_ADM1, ADM1_CODE, ADM0_CODE, ADM1_NAME, ADM0_NAME, geometry, + matches("^sum\\..+_HarvestedAreaHectares$"), geometry) %>% + group_by(ADM0_CODE, ADM1_CODE) %>% + mutate(dark_green_ha = sum(sum.spinach_HarvestedAreaHectares, + sum.vetch_HarvestedAreaHectares)) %>% + mutate(red_yellow_ha = sum(sum.tomato_HarvestedAreaHectares, + sum.pumpkinetc_HarvestedAreaHectares, + sum.carrot_HarvestedAreaHectares)) %>% + mutate(other_veg_ha = sum(sum.artichoke_HarvestedAreaHectares, + sum.asparagus_HarvestedAreaHectares, + sum.cabbage_HarvestedAreaHectares, + sum.carob_HarvestedAreaHectares, + sum.cauliflower_HarvestedAreaHectares, + sum.chicory_HarvestedAreaHectares, + sum.cucumberetc_HarvestedAreaHectares, + sum.eggplant_HarvestedAreaHectares, + sum.garlic_HarvestedAreaHectares, + sum.greenbean_HarvestedAreaHectares, + sum.greenbroadbean_HarvestedAreaHectares, + sum.greencorn_HarvestedAreaHectares, + sum.greenonion_HarvestedAreaHectares, + sum.greenpea_HarvestedAreaHectares, + sum.legumenes_HarvestedAreaHectares, + sum.lettuce_HarvestedAreaHectares, + sum.mushroom_HarvestedAreaHectares, + # sum.olive_HarvestedAreaHectares, + sum.onion_HarvestedAreaHectares, + sum.stringbean_HarvestedAreaHectares, + sum.vegetablenes_HarvestedAreaHectares)) %>% + dplyr::select(ADM0_ADM1, ADM1_CODE, ADM0_CODE, ADM1_NAME, ADM0_NAME, + dark_green_ha, red_yellow_ha, other_veg_ha) + + +# write and read veggie geo data ------------------------------------------- +st_write(veggie_pd, here::here("Input_data", "Core_data", "GAUL2015_veggie_pd.shp"), + driver = "ESRI Shapefile") +st_write(veggie_ha, here::here("Input_data", "Core_data", "GAUL2015_veggie_ha.shp"), + driver = "ESRI Shapefile") + +# write as csv +write_csv(veggie_ha, here("Input_data", "veggie_ha.csv")) +write_csv(veggie_pd, here("Input_data", "veggie_pd.csv")) + +# Join harvested area and calcluate veggie ratio -------------------------- +# Validation: Calulation of veggies ratio correct! adds up to 1. +ratio_veg_ha_pd = veggie_pd %>% + group_by(ADM0_CODE, ADM1_CODE) %>% + left_join(., veggie_ha, by=c("ADM0_CODE","ADM1_CODE")) %>% + rowwise() %>% + mutate(DarkGreen_ratio_pd = dark_green_pd/(dark_green_pd+red_yellow_pd+other_veg_pd), + DarkGreen_ratio_ha= dark_green_ha/(dark_green_ha+red_yellow_ha+other_veg_ha), + RedYellow_ratio_pd= red_yellow_pd/(dark_green_pd+red_yellow_pd+other_veg_pd), + RedYellow_ratio_ha= red_yellow_ha/(dark_green_ha+red_yellow_ha+other_veg_ha), + OtherVeg_ratio_pd= other_veg_pd/(dark_green_pd+red_yellow_pd+other_veg_pd), + OtherVeg_ratio_ha= other_veg_ha/(dark_green_ha+red_yellow_ha+other_veg_ha)) %>% + mutate_at(vars(DarkGreen_ratio_pd, DarkGreen_ratio_ha, + RedYellow_ratio_pd, RedYellow_ratio_ha, + OtherVeg_ratio_pd, OtherVeg_ratio_ha), ~replace(., is.nan(.), 0)) %>% + # rename(ADM0_CODE = ADM0_CODE, + # ADM1_CODE = ADM1_CODE) %>% + dplyr::select(ADM1_CODE, ADM0_CODE,, DarkGreen_ratio_pd, DarkGreen_ratio_ha, RedYellow_ratio_pd, + RedYellow_ratio_ha, OtherVeg_ratio_pd, OtherVeg_ratio_ha) + +# write and read veggie data ---------------------------------------------- +write_csv(ratio_veg_ha_pd, here::here("Input_data", "GAUL2015_ratio_veg_ha_pd.csv")) +ratio_veg_ha_pd = read_csv(here::here("Input_data", "GAUL2015_ratio_veg_ha_pd.csv")) + + +# Dissagregating treenuts and Veggies ------------------------------------- + +# Apply tnut and veggie ratio to SPAM2010 ----------------------------------------------------- +SPAM2010_global_Gaul2015 = SPAM2010_adm1_exdbf %>% + group_by(ADM1_CODE, ADM0_CODE) %>% + left_join(., TnutsRatio_ha_pd_GAUL2015, by=c("ADM1_CODE", "ADM0_CODE")) %>% + left_join(., ratio_veg_ha_pd, by=c("ADM1_CODE", "ADM0_CODE")) %>% + pivot_wider(names_from = "crop", values_from = "value") %>% + mutate(TNUT = case_when(rec_type == "P" ~ REST*tnuts_ratio_pd, TRUE ~ REST), + TNUT = case_when(rec_type == c("H","A") ~ REST*tnuts_ratio_ha, TRUE ~ TNUT), #TRUE ~ TNUT is important as otherwise (~REST) would overrite the TNUT values from line above. + REST = case_when(rec_type == "P" ~ REST * (1-tnuts_ratio_pd), TRUE ~ REST), + REST = case_when(rec_type == c("H","A") ~ REST * (1-tnuts_ratio_ha), TRUE ~ REST), + DVEG = case_when(rec_type == "P" ~ VEGE * DarkGreen_ratio_pd, TRUE ~ 0), + DVEG = case_when(rec_type == c("H","A") ~ VEGE * DarkGreen_ratio_ha, TRUE ~ DVEG), + OVEG = case_when(rec_type == "P" ~ VEGE*OtherVeg_ratio_pd, TRUE ~ 0), + OVEG = case_when(rec_type == c("H","A") ~ VEGE* OtherVeg_ratio_ha, TRUE ~ OVEG), + RVEG = case_when(rec_type == "P" ~ VEGE*RedYellow_ratio_pd, TRUE ~ 0), + RVEG = case_when(rec_type == c("H","A") ~ VEGE* RedYellow_ratio_ha, TRUE ~ RVEG))%>% + relocate(TNUT, .before = REST) %>% #shifting columns + relocate(DVEG, .before = REST) %>% + relocate(RVEG, .before = REST) %>% + relocate(OVEG, .before = REST) %>% + dplyr::select(-c(tnuts_ratio_pd:tnuts_ratio_ha, DarkGreen_ratio_pd, DarkGreen_ratio_ha, + RedYellow_ratio_ha,RedYellow_ratio_pd, OtherVeg_ratio_ha, OtherVeg_ratio_pd, VEGE)) %>% #IMPORTANT: Deleting VEGE to not double count it when rbinnding it with + pivot_longer(cols = ACOF:YAMS, values_to = "value") %>% + pivot_wider(names_from = "rec_type", values_from = "value") + +# Write importnat files to disc +write_csv(SPAM2010_global_Gaul2015, here("Input_data", "SPAM2010_global_nuts_veg_ADM1_aul2015ex.csv")) + + +# Subsetting ------------------------------------------------------------- +EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", + "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR", "CYP") + +# EU_cntr_Cifos = c("Austria","Belgium","Bulgaria","Croatia","Cyprus", "Czechia","Denmark", +# "Estonia","Finland","France","Germany","Greece","Hungary","Ireland", +# "Italy","Latvia","Lithuania","Luxembourg","Malta","Netherlands","Poland", +# "Portugal","Romania","Slovakia","Slovenia","Spain","Sweden",'United Kingdom') + +# Mapping files ---------------------------------------------------------- +# Country +dat_map_SPAM_GAUL_CIFOS_FAO = read.csv(here("Input_data", "Mappings", "dat_map_SPAM_GAUL_CIFOS_FAO.csv")) %>% as_tibble() +SPAM_GAUL_glo_MAP = SPAM2010_global_Gaul2015 %>% left_join(., dat_map_SPAM_GAUL_CIFOS_FAO, by = c("ADM0_CODE"="ADM0_CODE_GAUL")) + +# Crops + + +# SPAM crop mapping +SPAM2010_CropList = read_excel("Input_data/Mappings/SPAM2010_CropList.xlsx", + sheet = "crop_map_distinct") + +# Subset EU28 - ADding yields, renaming, mapping names +SPAM_GAUL_EU28 = SPAM_GAUL_glo_MAP %>% filter(., iso3_SPAM %in% EU_iso3) %>% + dplyr::select(iso3_SPAM, ADM1_CODE, ADM0_CODE, ADM0_NAME, ADM1_NAME, name_cntr_CIFOS2021, name, tech_type, H, P, A) %>% + rename(HarvestedArea_ha = H, + PhysicalArea_ha = A, + Production_mt = P, + crop = name) %>% + mutate(Yield_kgha = round((Production_mt*1000/HarvestedArea_ha),1))%>% #checked for result. We are transforming mt to kg when multiplying Production by 1000 to get unit kg/ha for yields + mutate_at(vars(Yield_kgha), ~replace(., is.nan(.), 0)) %>% + mutate(Production_mt = ifelse(is.infinite(Yield_kgha), 0, Production_mt)) %>% #If Yields show inf values P is changed to 0. + mutate(Yield_kgha = ifelse(is.infinite(Yield_kgha),0, Yield_kgha)) %>% #inf values in Y column are changed to 0's + mutate(across(c(HarvestedArea_ha,PhysicalArea_ha,Production_mt,Yield_kgha), round, 1)) %>% + mutate(crop = tolower(crop)) %>% # convert to lower case + left_join(., SPAM2010_CropList, by = c("crop" = "short_spam2010")) %>% + rename("country_cifos" = "name_cntr_CIFOS2021", + "crops_cifos" = "cifos_crops", + "crops_spam" = "crop", + "crops_fao" = "name_fao", + iso3 = iso3_SPAM) %>% + dplyr::select(iso3, ADM0_CODE, ADM1_CODE, country_cifos, ADM1_NAME, crops_cifos, crops_spam, tech_type, HarvestedArea_ha, PhysicalArea_ha, Production_mt, Yield_kgha) + +write.csv(SPAM_GAUL_EU28, here("Input_data", "Core_data", "SPAM_GAUL2015_EU28.csv")) diff --git a/SPAM_Data_Transformation/SPAM2010_geotiff_aggregationADM1.R b/SPAM_Data_Transformation/SPAM2010_geotiff_aggregationADM1.R index d83a52c..21c3190 100644 --- a/SPAM_Data_Transformation/SPAM2010_geotiff_aggregationADM1.R +++ b/SPAM_Data_Transformation/SPAM2010_geotiff_aggregationADM1.R @@ -1255,7 +1255,7 @@ VeggieFun_area = function(spam){ dplyr::select(-VEGE) } -Fun_wide = function(dat){ +Fun_swide = function(dat){ dat %>%pivot_wider(names_from = crop, values_from = valu) } -- GitLab