From fe2f8d014f0d40118d8282a212b1b67b2cdcdc7b Mon Sep 17 00:00:00 2001
From: "WUR\\simon083" <wolfram.simon@wur.nl>
Date: Thu, 3 Jun 2021 09:51:33 +0200
Subject: [PATCH] Final R script for extracted GEOTIFF (in qgis) of SPAM and
 GAUL2015

---
 Fertilization_rates/Fertilizatio_rates.R      |   2 +-
 .../SPAM2010_GAUL2015_SpatialEx.R             | 506 ++++++++++++++++++
 .../SPAM2010_geotiff_aggregationADM1.R        |   2 +-
 3 files changed, 508 insertions(+), 2 deletions(-)
 create mode 100644 SPAM_Data_Transformation/SPAM2010_GAUL2015_SpatialEx.R

diff --git a/Fertilization_rates/Fertilizatio_rates.R b/Fertilization_rates/Fertilizatio_rates.R
index ac39625..bd83460 100644
--- a/Fertilization_rates/Fertilizatio_rates.R
+++ b/Fertilization_rates/Fertilizatio_rates.R
@@ -67,7 +67,7 @@ fertilizer_FAO_EU28 = nutrients_FAO_global %>% filter(., Area %in% cntr_FAO_SPAM
 # SPAM crop mapping
 SPAM2010_CropList = read_excel("Input_data/Mappings/SPAM2010_CropList.xlsx", 
                                sheet = "crop_map_distinct") %>% 
-  mutate_at(c("short_spam2010"), funs(recode(., 'dveg'='GreenVeg',
+  mutate_at(c("short_spam2010"), funs(recode(., 'oveg'='GreenVeg',
                                              'oveg'='RestVeg',
                                              'rveg'='RedYelVeg',
                                              'tnut'='tnuts')))
diff --git a/SPAM_Data_Transformation/SPAM2010_GAUL2015_SpatialEx.R b/SPAM_Data_Transformation/SPAM2010_GAUL2015_SpatialEx.R
new file mode 100644
index 0000000..82b66bc
--- /dev/null
+++ b/SPAM_Data_Transformation/SPAM2010_GAUL2015_SpatialEx.R
@@ -0,0 +1,506 @@
+# This file is a result of a qgis extraction of all the different geotif maps. 
+# In qgis I found that the extracted values were closest to the tabular values of the csv SPAM2010.
+# Therefore, R and Arc were rejected and zonal statistics was done in qgis with the gaul2015 files as shapes.
+
+
+# Load packages  ----------------------------------------------------------
+easypackages::packages(c("tidyverse", 'sf', 'raster','sp', 'spatialEco',
+                         'exactextractr', 'here', "readxl", "foreign", 
+                         "data.table", "rbenchmark", "benchmarkme",
+                         "validate", "forcats", "devtools","magrittr", 
+                         "rlist", "largeList", "snow", "foreign",
+                         "reshape2", "margrittr", "stringi"))
+
+
+# Functions ---------------------------------------------------------------
+# Functions loading dbf SPAM files
+FUNload_list_dbf_SPAM <- function(path, type) {
+  pattern <- switch (type,
+                     "harv" = "_H_",
+                     "phys" = "_A_",
+                     "prod" = "_P_") # how to add intesnity levels here? Multiple arguments in the pattern? 
+  path <- list.files(path = path, 
+                     full.names = TRUE,
+                     pattern = pattern)
+  l_dbf <- lapply(path, foreign::read.dbf)
+  names(l_dbf) <- basename(path)
+  return(l_dbf)
+}
+
+# Functions Treenuts
+load_list_raster <- function(path, type) {
+  pattern <- switch (type,
+                     "yield" = "_YieldPerHectare\\.tif",
+                     "area" = "_HarvestedAreaHectares\\.tif",
+                     "prod" = "_Production\\.tif")
+  path <- list.files(path = path, 
+                     full.names = TRUE,
+                     pattern = pattern)
+  l_raster <- lapply(path, raster::raster)
+  # crop_names <- stringr::str_extract(basename(path), "^.+(?=_)")
+  # names(l_raster) <- crop_names
+  return(l_raster)
+}
+
+# Zonal statistics with GUAL2015
+FunZone_sum=function(rast){ # raster and vector
+  c=zonal.stats(shape_gaul2015, rast, stats = c("sum")) 
+  c$ADM0_ADM1<- shape_gaul2015$ADM0_ADM1  # assigning the identifier of the original df to the extracted df
+  dat <- merge(shape_gaul2015, c, by.x = 'ADM0_ADM1', by.y = 'ADM0_ADM1') # merge the two files together
+}
+
+FUN_select_values = function(DF_list){
+  dplyr::select(DF_list, starts_with("sum")) %>% as_tibble() %>% 
+    dplyr::select(-geometry)
+}
+
+# Create factors based on other factors conditionally
+FunFactCond = function(df, match_vec){
+  df %>% mutate(first_tx = ifelse(name %in% match_vec, "tnuts", "rest"))
+}
+
+# Create factors based on other factors conditionally
+FunFactCond_veg = function(df, match_vec){
+  df %>% mutate(first_tx = ifelse(name %in% match_green, "dark_green", 
+                                  ifelse(name %in% match_red, "red_yellow", "other_veg")))
+}
+
+
+
+# Load data  --------------------------------------------------------------
+SPAMdbf_harv = FUNload_list_dbf_SPAM(path="C:\\Users\\simon083\\OneDrive - Wageningen University & Research\\PhD_WJS\\Academic\\RQ1\\Data_analysis\\Spatial_mod\\Modified_scripts\\R\\CifoS_Crop\\Input_data\\SPAM_qgis_ex_dbf\\", type = "harv")
+SPAMdbf_prod = FUNload_list_dbf_SPAM(path="C:\\Users\\simon083\\OneDrive - Wageningen University & Research\\PhD_WJS\\Academic\\RQ1\\Data_analysis\\Spatial_mod\\Modified_scripts\\R\\CifoS_Crop\\Input_data\\SPAM_qgis_ex_dbf\\", type = "prod")
+SPAMdbf_phys = FUNload_list_dbf_SPAM(path="C:\\Users\\simon083\\OneDrive - Wageningen University & Research\\PhD_WJS\\Academic\\RQ1\\Data_analysis\\Spatial_mod\\Modified_scripts\\R\\CifoS_Crop\\Input_data\\SPAM_qgis_ex_dbf\\", type = "phys")
+
+# Data transformation -----------------------------------------------------
+# Clean lists
+SPAMdbf_harv = lapply(SPAMdbf_harv, function(df){dplyr::select(df, -c(STR1_YEAR:DISP_AREA, Shape_Leng:Shape_Area))})
+SPAMdbf_prod = lapply(SPAMdbf_prod, function(df){dplyr::select(df, -c(STR1_YEAR:DISP_AREA, Shape_Leng:Shape_Area))})
+SPAMdbf_phys = lapply(SPAMdbf_phys, function(df){dplyr::select(df, -c(STR1_YEAR:DISP_AREA, Shape_Leng:Shape_Area))})
+
+# Bind lists together, pull the list names as dataframe column  - https://stackoverflow.com/questions/48255351/create-a-column-based-on-the-name-of-the-element-list-that-contain-the-data-fram
+df_harv = dplyr::bind_rows(SPAMdbf_harv, .id = "meta_information") %>% as_tibble() %>% 
+  mutate_at(vars(X_zonalsum,X_sum,zonal_sum), ~replace_na(., 0)) %>% rowwise() %>% 
+  mutate(value = X_zonalsum+X_sum+zonal_sum) %>% dplyr::select(-c(zonal_sum:X_mean)) %>% 
+  mutate(rec_type = "H")
+
+df_prod = dplyr::bind_rows(SPAMdbf_prod, .id = "meta_information") %>% as_tibble() %>% 
+  mutate(rec_type = "P") %>%  rename(value = X_zonalsum)
+
+df_phys = dplyr::bind_rows(SPAMdbf_phys, .id = "meta_information") %>% as_tibble() %>% 
+  mutate_at(vars(X_zonalsum,zonal_sum), ~replace_na(., 0)) %>% rowwise() %>% 
+  mutate(value = X_zonalsum+zonal_sum) %>% dplyr::select(-c(X_zonalsum:zonal_sum)) %>% 
+  mutate(rec_type = "A")
+
+# Bind dfs togehter
+df_rec = dplyr::bind_rows(df_harv, df_prod, df_phys) 
+
+# Add columns with crops and tech type levels and recode the tech levels
+SPAM2010_adm1_exdbf = df_rec %>% 
+  separate(meta_information, c("1", "2", "3", "Rec_type", "crop", "tech_type"), sep = "_") %>% 
+  dplyr::select(-c(1, 2, 3)) %>% 
+  mutate_at(c("tech_type"), funs(recode(.,'A.dbf'  = 'A',
+                                        'I.dbf'  = 'I',
+                                        'R.dbf'  = 'R',
+                                        'L.dbf'  = 'L',
+                                        'S.dbf'  = 'S',
+                                        'H.dbf'  = 'H'))) %>% 
+  dplyr::select(-Rec_type)
+
+write_csv(SPAM2010_adm1_exdbf, here("Input_data", "SPAM2010_adm1_exdbf_global.csv"))
+# SPAM2010_adm1_exdbf  =read_csv(here("Input_data", "SPAM2010_adm1_exdbf_global.csv"))
+# Treenuts ----------------------------------------------------------------
+# Load GAUL2015 -----------------------------------------------------------
+
+shape_gaul2015=read_sf(here::here("Input_data", "g2015_2014_1.shp")) 
+shape_gaul2015$ADM0_ADM1 = paste(shape_gaul2015$ADM0_CODE, shape_gaul2015$ADM1_CODE, sep= "_")
+shape_gaul_short_tbl = shape_gaul2015 %>%  as_tibble() %>% dplyr::select(-geometry)
+
+# Load treenut data -------------------------------------------------------
+# Load harvested area and production raster data
+rast_harv = load_list_raster(path=here::here("Input_data"), type = "area")
+rast_prod = load_list_raster(path=here::here("Input_data"), type = "prod")
+
+# Zonal statistics Treenuts -----------------------------------------------
+lst_ex_pd = lapply(rast_prod, FunZone_sum)
+lst_ex_ha = lapply(rast_harv, FunZone_sum)
+
+# Select the extraction value columns
+lst_values_pd = lapply(lst_ex_pd, FUN_select_values)
+lst_values_ha = lapply(lst_ex_ha, FUN_select_values)
+
+# Merging all lists together to a df
+dat_ex_pd = do.call("cbind", lst_values_pd) # merging all tech types
+dat_ex_ha = do.call("cbind", lst_values_ha)
+
+# vector containing all tnuts names 
+tnuts_vec = c("sum.walnut_Production", "sum.almond_Production","sum.chestnut_Production",
+              "sum.cashew_Production","sum.pistachio_Production","sum.hazelnut_Production",
+              "sum.brazil_Production","sum.nutnes_Production")
+
+# production values tnuts adm1 --------------------------
+treenut_pd = lst_ex_pd[[2]] %>% as_tibble() %>% 
+  dplyr::select(ADM0_ADM1, ADM1_CODE , ADM0_CODE, ADM1_NAME, ADM0_NAME,geometry) %>% 
+  bind_cols(., dat_ex_pd) %>% 
+  dplyr::select(ADM0_ADM1, ADM1_CODE , ADM0_CODE, ADM1_NAME, ADM0_NAME, 
+                matches("^sum\\..+_Production$"), geometry) %>%   
+  group_by(ADM0_CODE,ADM1_CODE) %>% 
+  mutate(treenut_pd = sum(sum.walnut_Production, 
+                          sum.almond_Production,
+                          sum.chestnut_Production,
+                          sum.cashew_Production,
+                          sum.pistachio_Production,
+                          sum.hazelnut_Production,
+                          sum.brazil_Production,
+                          sum.nutnes_Production)) %>%
+  mutate(rest_pd = sum(sum.aniseetc_Production,
+                       sum.chilleetc_Production,
+                       sum.kolanut_Production,
+                       sum.areca_Production,
+                       sum.cinnamon_Production,
+                       sum.ginger_Production,
+                       sum.hop_Production,
+                       sum.mate_Production, 
+                       sum.nutmeg_Production,
+                       sum.pepper_Production,
+                       sum.peppermint_Production,
+                       sum.pyrethrum_Production,
+                       sum.ramie_Production,
+                       sum.rubber_Production,
+                       sum.spicenes_Production,
+                       sum.sugarnes_Production,
+                       sum.vanilla_Production)) %>% 
+  dplyr::select(ADM0_ADM1, ADM1_CODE , ADM0_CODE, ADM1_NAME, ADM0_NAME,treenut_pd, rest_pd)
+
+
+# validation of aggregation 
+# Validate_agg = xcv %>%dplyr::select(tnuts_vec, iso3, name_adm1) %>% 
+# pivot_longer(cols = starts_with("sum."),
+# names_to = "Nuts", 
+# values_to = "value")
+
+# calcluating the harvested area values for treenuts --------------------------
+treenut_ha = lst_ex_ha[[2]] %>% as_tibble() %>% 
+  dplyr::select(ADM0_ADM1, ADM1_CODE , ADM0_CODE, ADM1_NAME, ADM0_NAME, geometry) %>% 
+  bind_cols(., dat_ex_ha) %>% 
+  dplyr::select(ADM0_ADM1, ADM1_CODE , ADM0_CODE, ADM1_NAME, ADM0_NAME, 
+                matches("^sum\\..+_HarvestedAreaHectares$"), geometry) %>%   
+  group_by(ADM0_CODE, ADM1_CODE) %>% 
+  mutate(treenut_ha = sum(sum.walnut_HarvestedAreaHectares, 
+                          sum.almond_HarvestedAreaHectares,
+                          sum.chestnut_HarvestedAreaHectares,
+                          sum.cashew_HarvestedAreaHectares,
+                          sum.pistachio_HarvestedAreaHectares,
+                          sum.hazelnut_HarvestedAreaHectares,
+                          sum.brazil_HarvestedAreaHectares,
+                          sum.nutnes_HarvestedAreaHectares)) %>%
+  mutate(rest_ha = sum(sum.aniseetc_HarvestedAreaHectares,
+                       sum.chilleetc_HarvestedAreaHectares,
+                       sum.kolanut_HarvestedAreaHectares,
+                       sum.areca_HarvestedAreaHectares,
+                       sum.cinnamon_HarvestedAreaHectares,
+                       sum.ginger_HarvestedAreaHectares,
+                       sum.hop_HarvestedAreaHectares,
+                       sum.mate_HarvestedAreaHectares, 
+                       sum.nutmeg_HarvestedAreaHectares,
+                       sum.pepper_HarvestedAreaHectares,
+                       sum.peppermint_HarvestedAreaHectares,
+                       sum.pyrethrum_HarvestedAreaHectares,
+                       sum.ramie_HarvestedAreaHectares,
+                       sum.rubber_HarvestedAreaHectares,
+                       sum.spicenes_HarvestedAreaHectares,
+                       sum.sugarnes_HarvestedAreaHectares,
+                       sum.vanilla_HarvestedAreaHectares)) %>% 
+  dplyr::select(ADM0_ADM1, ADM1_CODE, ADM0_CODE, ADM1_NAME, ADM0_NAME, treenut_ha, rest_ha)
+
+# Writing treenut intermediate files
+st_write(treenut_pd, here::here("Input_data", "Core_data",  "GAUL2015_tnuts_pd.shp"),
+         driver = "ESRI Shapefile")
+st_write(treenut_ha, here::here("Input_data", "Core_data",  "GAUL2015_tnuts_ha.shp"),
+         driver = "ESRI Shapefile")
+
+# write as csv
+write_csv(treenut_ha, here("Input_data", "treenut_ha.csv"))
+write_csv(treenut_pd, here("Input_data", "treenut_pd.csv"))
+
+
+prod.tib.adm1 = treenut_pd %>% as_tibble() %>%   
+  dplyr::select(ADM0_ADM1, ADM1_CODE, ADM0_CODE, ADM1_NAME, ADM0_NAME, treenut_pd, rest_pd) %>%
+  dplyr::group_by(ADM0_CODE,ADM1_CODE) %>% 
+  dplyr::summarise(round(across(.cols = c(treenut_pd, rest_pd), .fns = sum),1))
+
+# Harvested area 
+harv.tib.adm1 = treenut_ha %>%  as_tibble() %>% 
+  dplyr::select(ADM0_ADM1, ADM1_CODE, ADM0_CODE, ADM1_NAME, ADM0_NAME, treenut_ha, rest_ha) %>%
+  dplyr::group_by(ADM0_CODE,ADM1_CODE) %>% 
+  dplyr::summarise(round(across(.cols = c(treenut_ha, rest_ha), .fns = sum),1))  #rounding is kept as in SPAM2010; 1 decimal!
+
+# Join harvested area and calcluate tree nuts ratio  --------------------------
+# Validation: Calulation of tnuts ratio correct! 
+TnutsRatio_ha_pd_GAUL2015 = treenut_pd %>%
+  group_by(ADM0_CODE,ADM1_CODE) %>% 
+  left_join(., treenut_ha, by=c("ADM0_CODE","ADM1_CODE")) %>% 
+  rowwise() %>% 
+  mutate(tnuts_ratio_pd = treenut_pd/(treenut_pd + rest_pd),
+         tnuts_ratio_ha = treenut_ha/(treenut_ha + rest_ha)) %>%
+  mutate_at(vars(tnuts_ratio_ha, tnuts_ratio_pd), ~replace(., is.nan(.), 0)) %>% 
+  # dplyr::rename(ADM0_CODE = ADM0_CODE.x, ADM1_CODE = ADM1_CODE.x) %>%
+  dplyr::select(ADM0_CODE,ADM1_CODE, tnuts_ratio_pd, tnuts_ratio_ha)
+
+# write and read treenuts data ----------------------------------------------
+write_csv(TnutsRatio_ha_pd_GAUL2015, here::here("Input_data", "GAUL2015_ratio_tnuts_ha_pd.csv"))
+TnutsRatio_ha_pd_GGS= read_csv(here::here("Input_data", "GAUL2015_ratio_tnuts_ha_pd.csv"))
+
+# VEGGIES DISAGGREGATION  -------------------------------------------------
+# Loading veggie raster files  ------------------------------------------------
+# # Functions Veggies
+# load_list_raster <- function(path, type) {
+#   pattern <- switch (type,
+#                      "yield" = "_YieldPerHectare\\.tif",
+#                      "area" = "_HarvestedAreaHectares\\.tif",
+#                      "prod" = "_Production\\.tif")
+#   path <- list.files(path = path,
+#                      full.names = TRUE,
+#                      pattern = pattern)
+#   l_raster <- lapply(path, raster::raster)
+#   # crop_names <- stringr::str_extract(basename(path), "^.+(?=_)")
+#   # names(l_raster) <- crop_names
+#   return(l_raster)
+# }
+# # 
+# # Exctracting veggies and rest area and production value per adm1 zones
+# FunZone_sum=function(rast){ # raster and vector
+#   c=zonal.stats(ADM1_SPAM_GGS, rast, stats = c("sum"))
+#   c$name_adm1 <- ADM1_SPAM_GGS$name_adm1  # assigning the identifier of the original df to the extracted df
+#   dat <- left_join(ADM1_SPAM_GGS, c, by = 'name_adm1') # merge the two files together
+# }
+# 
+# FUN_select_values = function(DF_list){
+#   dplyr::select(DF_list, starts_with("sum")) %>% as_tibble() %>%
+#     dplyr::select(-geometry)
+# }
+
+
+
+# Load treenut data -------------------------------------------------------
+# Load harvested area and production raster data
+rast_harv_veg = load_list_raster(path=here::here("Input_data", "Veggies_all"), type = "area")
+rast_prod_veg = load_list_raster(path=here::here("Input_data", "Veggies_all"), type = "prod")
+
+# Zonal statistics Treenuts -----------------------------------------------
+lst_ex_pd_veg = lapply(rast_prod_veg, FunZone_sum)
+lst_ex_ha_veg = lapply(rast_harv_veg, FunZone_sum)
+
+# Select the extraction value columns
+lst_values_pd_veg = lapply(lst_ex_pd_veg, FUN_select_values)
+lst_values_ha_veg = lapply(lst_ex_ha_veg, FUN_select_values)
+
+# Merging all lists together to a df
+dat_ex_pd_veg = do.call("cbind", lst_values_pd_veg) # merging all tech types
+dat_ex_ha_veg = do.call("cbind", lst_values_ha_veg)
+
+# production values tnuts adm1 --------------------------
+veggie_pd = lst_ex_pd_veg[[2]] %>% as_tibble() %>% 
+  dplyr::select(ADM0_ADM1, ADM1_CODE, ADM0_CODE, ADM1_NAME, ADM0_NAME, geometry) %>% 
+  bind_cols(., dat_ex_pd_veg) %>% 
+  dplyr::select(ADM0_ADM1, ADM1_CODE, ADM0_CODE, ADM1_NAME, ADM0_NAME, 
+                matches("^sum\\..+_Production$"), geometry) %>%   
+  group_by(ADM0_CODE, ADM1_CODE) %>% 
+  mutate(dark_green_pd = sum(sum.spinach_Production,   
+                             sum.vetch_Production)) %>%
+  mutate(red_yellow_pd = sum(sum.tomato_Production,
+                             sum.pumpkinetc_Production,
+                             sum.carrot_Production)) %>% 
+  mutate(other_veg_pd = sum(sum.artichoke_Production,
+                            sum.asparagus_Production,
+                            sum.cabbage_Production,
+                            sum.carob_Production,
+                            sum.cauliflower_Production,
+                            sum.chicory_Production,
+                            sum.cucumberetc_Production,
+                            sum.eggplant_Production,
+                            sum.garlic_Production,
+                            sum.greenbean_Production, 
+                            sum.greenbroadbean_Production,
+                            sum.greencorn_Production,
+                            sum.greenonion_Production,
+                            sum.greenpea_Production,
+                            sum.legumenes_Production,
+                            sum.lettuce_Production,
+                            sum.mushroom_Production,
+                            # sum.olive_Production,
+                            sum.onion_Production,
+                            sum.stringbean_Production,
+                            sum.vegetablenes_Production)) %>% 
+  dplyr::select(ADM0_ADM1, ADM1_CODE, ADM0_CODE, ADM1_NAME, ADM0_NAME,
+                dark_green_pd, red_yellow_pd, other_veg_pd)
+
+
+# validation of veggie aggregation  ---------------------------------------
+# vector containing all three veggie class names 
+# veg_dark_vec = c("sum.spinach_Production","sum.vetch_Production")
+# veg_red_vec = c("sum.tomato_Production","sum.pumpkinetc_Production",
+#                 "sum.carrot_Production")
+# veg_other_vec = c("sum.artichoke_Production","sum.asparagus_Production",
+#                   "sum.cabbage_Production","sum.carob_Production",
+#                   "sum.cauliflower_Production","sum.chicory_Production",
+#                   "sum.cucumberetc_Production","sum.eggplant_Production",
+#                   "sum.garlic_Production","sum.greenbean_Production", 
+#                   "sum.greenbroadbean_Production","sum.greencorn_Production",
+#                   "sum.greenonion_Production","sum.greenpea_Production",
+#                   "sum.legumenes_Production","sum.lettuce_Production",
+#                   "sum.mushroom_Production",# sum.olive_Production,
+#                   "sum.onion_Production","sum.stringbean_Production",
+#                   "sum.vegetablenes_Production")
+# 
+# Validate_agg = veggies_pd %>%dplyr::select(tnuts_vec, iso3, name_adm1) %>% 
+#   pivot_longer(cols = starts_with("sum."),
+#                names_to = "veggies", 
+#                values_to = "value")
+
+# calculating the harvested area values for veggies --------------------------
+veggie_ha = lst_ex_ha_veg[[2]] %>% as_tibble() %>% 
+  dplyr::select(ADM0_ADM1, ADM1_CODE, ADM0_CODE, ADM1_NAME, ADM0_NAME, geometry) %>% 
+  bind_cols(., dat_ex_ha_veg) %>% 
+  dplyr::select(ADM0_ADM1, ADM1_CODE, ADM0_CODE, ADM1_NAME, ADM0_NAME, geometry, 
+                matches("^sum\\..+_HarvestedAreaHectares$"), geometry) %>%   
+  group_by(ADM0_CODE, ADM1_CODE) %>% 
+  mutate(dark_green_ha = sum(sum.spinach_HarvestedAreaHectares,   
+                             sum.vetch_HarvestedAreaHectares)) %>%
+  mutate(red_yellow_ha = sum(sum.tomato_HarvestedAreaHectares,
+                             sum.pumpkinetc_HarvestedAreaHectares,
+                             sum.carrot_HarvestedAreaHectares)) %>% 
+  mutate(other_veg_ha = sum(sum.artichoke_HarvestedAreaHectares,
+                            sum.asparagus_HarvestedAreaHectares,
+                            sum.cabbage_HarvestedAreaHectares,
+                            sum.carob_HarvestedAreaHectares,
+                            sum.cauliflower_HarvestedAreaHectares,
+                            sum.chicory_HarvestedAreaHectares,
+                            sum.cucumberetc_HarvestedAreaHectares,
+                            sum.eggplant_HarvestedAreaHectares,
+                            sum.garlic_HarvestedAreaHectares,
+                            sum.greenbean_HarvestedAreaHectares, 
+                            sum.greenbroadbean_HarvestedAreaHectares,
+                            sum.greencorn_HarvestedAreaHectares,
+                            sum.greenonion_HarvestedAreaHectares,
+                            sum.greenpea_HarvestedAreaHectares,
+                            sum.legumenes_HarvestedAreaHectares,
+                            sum.lettuce_HarvestedAreaHectares,
+                            sum.mushroom_HarvestedAreaHectares,
+                            # sum.olive_HarvestedAreaHectares,
+                            sum.onion_HarvestedAreaHectares,
+                            sum.stringbean_HarvestedAreaHectares,
+                            sum.vegetablenes_HarvestedAreaHectares)) %>% 
+  dplyr::select(ADM0_ADM1, ADM1_CODE, ADM0_CODE, ADM1_NAME, ADM0_NAME,
+                dark_green_ha, red_yellow_ha, other_veg_ha)
+
+
+# write and read veggie geo data -------------------------------------------
+st_write(veggie_pd, here::here("Input_data", "Core_data",  "GAUL2015_veggie_pd.shp"),
+         driver = "ESRI Shapefile")
+st_write(veggie_ha, here::here("Input_data", "Core_data",  "GAUL2015_veggie_ha.shp"),
+         driver = "ESRI Shapefile")
+
+# write as csv
+write_csv(veggie_ha, here("Input_data", "veggie_ha.csv"))
+write_csv(veggie_pd, here("Input_data", "veggie_pd.csv"))
+
+# Join harvested area and calcluate veggie ratio  --------------------------
+# Validation: Calulation of veggies ratio correct! adds up to 1. 
+ratio_veg_ha_pd = veggie_pd %>%
+  group_by(ADM0_CODE, ADM1_CODE) %>% 
+  left_join(., veggie_ha, by=c("ADM0_CODE","ADM1_CODE")) %>% 
+  rowwise() %>% 
+  mutate(DarkGreen_ratio_pd = dark_green_pd/(dark_green_pd+red_yellow_pd+other_veg_pd),
+         DarkGreen_ratio_ha= dark_green_ha/(dark_green_ha+red_yellow_ha+other_veg_ha),
+         RedYellow_ratio_pd= red_yellow_pd/(dark_green_pd+red_yellow_pd+other_veg_pd),
+         RedYellow_ratio_ha= red_yellow_ha/(dark_green_ha+red_yellow_ha+other_veg_ha),
+         OtherVeg_ratio_pd= other_veg_pd/(dark_green_pd+red_yellow_pd+other_veg_pd),
+         OtherVeg_ratio_ha= other_veg_ha/(dark_green_ha+red_yellow_ha+other_veg_ha)) %>% 
+  mutate_at(vars(DarkGreen_ratio_pd, DarkGreen_ratio_ha, 
+                 RedYellow_ratio_pd, RedYellow_ratio_ha,
+                 OtherVeg_ratio_pd, OtherVeg_ratio_ha), ~replace(., is.nan(.), 0)) %>% 
+  # rename(ADM0_CODE = ADM0_CODE,
+         # ADM1_CODE = ADM1_CODE) %>% 
+  dplyr::select(ADM1_CODE, ADM0_CODE,, DarkGreen_ratio_pd, DarkGreen_ratio_ha, RedYellow_ratio_pd, 
+                RedYellow_ratio_ha, OtherVeg_ratio_pd, OtherVeg_ratio_ha)
+
+# write and read veggie data ----------------------------------------------
+write_csv(ratio_veg_ha_pd, here::here("Input_data", "GAUL2015_ratio_veg_ha_pd.csv"))
+ratio_veg_ha_pd = read_csv(here::here("Input_data", "GAUL2015_ratio_veg_ha_pd.csv"))
+
+
+# Dissagregating treenuts and Veggies -------------------------------------
+
+# Apply tnut and veggie ratio to SPAM2010 -----------------------------------------------------
+SPAM2010_global_Gaul2015 = SPAM2010_adm1_exdbf  %>%
+  group_by(ADM1_CODE, ADM0_CODE) %>%
+  left_join(., TnutsRatio_ha_pd_GAUL2015, by=c("ADM1_CODE", "ADM0_CODE")) %>%
+  left_join(., ratio_veg_ha_pd, by=c("ADM1_CODE", "ADM0_CODE")) %>% 
+  pivot_wider(names_from = "crop", values_from = "value") %>% 
+  mutate(TNUT = case_when(rec_type == "P" ~ REST*tnuts_ratio_pd, TRUE ~ REST),
+         TNUT = case_when(rec_type == c("H","A") ~ REST*tnuts_ratio_ha, TRUE ~ TNUT), #TRUE ~ TNUT is important as otherwise (~REST) would overrite the TNUT values from line above.
+         REST = case_when(rec_type == "P" ~ REST * (1-tnuts_ratio_pd), TRUE ~ REST),
+         REST = case_when(rec_type == c("H","A") ~ REST * (1-tnuts_ratio_ha), TRUE ~ REST),
+         DVEG = case_when(rec_type == "P" ~ VEGE * DarkGreen_ratio_pd, TRUE ~ 0),
+         DVEG = case_when(rec_type == c("H","A") ~ VEGE * DarkGreen_ratio_ha, TRUE ~ DVEG),
+         OVEG = case_when(rec_type == "P" ~ VEGE*OtherVeg_ratio_pd, TRUE ~ 0),
+         OVEG = case_when(rec_type == c("H","A") ~ VEGE* OtherVeg_ratio_ha, TRUE ~ OVEG),
+         RVEG = case_when(rec_type == "P" ~ VEGE*RedYellow_ratio_pd, TRUE ~ 0),
+         RVEG = case_when(rec_type == c("H","A") ~ VEGE* RedYellow_ratio_ha, TRUE ~ RVEG))%>%
+  relocate(TNUT, .before = REST) %>% #shifting columns
+  relocate(DVEG, .before = REST) %>% 
+  relocate(RVEG, .before = REST) %>% 
+  relocate(OVEG, .before = REST) %>%
+  dplyr::select(-c(tnuts_ratio_pd:tnuts_ratio_ha, DarkGreen_ratio_pd, DarkGreen_ratio_ha, 
+                   RedYellow_ratio_ha,RedYellow_ratio_pd, OtherVeg_ratio_ha, OtherVeg_ratio_pd, VEGE)) %>% #IMPORTANT: Deleting VEGE to not double count it when rbinnding it with 
+  pivot_longer(cols = ACOF:YAMS, values_to = "value") %>% 
+  pivot_wider(names_from = "rec_type", values_from = "value")
+
+# Write importnat files to disc
+write_csv(SPAM2010_global_Gaul2015, here("Input_data", "SPAM2010_global_nuts_veg_ADM1_aul2015ex.csv"))
+
+
+# Subsetting  -------------------------------------------------------------
+EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", 
+            "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR", "CYP")
+
+# EU_cntr_Cifos = c("Austria","Belgium","Bulgaria","Croatia","Cyprus", "Czechia","Denmark",
+#                   "Estonia","Finland","France","Germany","Greece","Hungary","Ireland",
+#                   "Italy","Latvia","Lithuania","Luxembourg","Malta","Netherlands","Poland",
+#                   "Portugal","Romania","Slovakia","Slovenia","Spain","Sweden",'United Kingdom')
+
+# Mapping files  ----------------------------------------------------------
+# Country
+dat_map_SPAM_GAUL_CIFOS_FAO = read.csv(here("Input_data", "Mappings", "dat_map_SPAM_GAUL_CIFOS_FAO.csv")) %>%  as_tibble()
+SPAM_GAUL_glo_MAP = SPAM2010_global_Gaul2015 %>% left_join(., dat_map_SPAM_GAUL_CIFOS_FAO, by = c("ADM0_CODE"="ADM0_CODE_GAUL"))
+
+# Crops
+
+
+# SPAM crop mapping
+SPAM2010_CropList = read_excel("Input_data/Mappings/SPAM2010_CropList.xlsx", 
+                               sheet = "crop_map_distinct")
+
+# Subset EU28 - ADding yields, renaming, mapping names
+SPAM_GAUL_EU28 = SPAM_GAUL_glo_MAP %>% filter(., iso3_SPAM  %in% EU_iso3) %>% 
+  dplyr::select(iso3_SPAM, ADM1_CODE, ADM0_CODE, ADM0_NAME, ADM1_NAME, name_cntr_CIFOS2021, name, tech_type, H, P, A) %>% 
+    rename(HarvestedArea_ha = H,
+           PhysicalArea_ha = A, 
+           Production_mt = P,
+           crop = name) %>% 
+    mutate(Yield_kgha = round((Production_mt*1000/HarvestedArea_ha),1))%>% #checked for result. We are transforming mt to kg  when multiplying Production by 1000 to get unit kg/ha for yields
+    mutate_at(vars(Yield_kgha), ~replace(., is.nan(.), 0)) %>% 
+    mutate(Production_mt = ifelse(is.infinite(Yield_kgha), 0, Production_mt)) %>% #If Yields show inf values P is changed to 0.
+    mutate(Yield_kgha = ifelse(is.infinite(Yield_kgha),0, Yield_kgha)) %>%   #inf values in Y column are changed to 0's
+    mutate(across(c(HarvestedArea_ha,PhysicalArea_ha,Production_mt,Yield_kgha), round, 1)) %>% 
+    mutate(crop = tolower(crop)) %>% # convert to lower case 
+    left_join(., SPAM2010_CropList, by = c("crop" = "short_spam2010")) %>%
+    rename("country_cifos" = "name_cntr_CIFOS2021",
+           "crops_cifos" = "cifos_crops",
+           "crops_spam" = "crop",
+           "crops_fao" = "name_fao",
+           iso3 = iso3_SPAM) %>% 
+    dplyr::select(iso3, ADM0_CODE, ADM1_CODE, country_cifos, ADM1_NAME, crops_cifos, crops_spam, tech_type, HarvestedArea_ha, PhysicalArea_ha, Production_mt, Yield_kgha) 
+
+write.csv(SPAM_GAUL_EU28, here("Input_data", "Core_data", "SPAM_GAUL2015_EU28.csv"))
diff --git a/SPAM_Data_Transformation/SPAM2010_geotiff_aggregationADM1.R b/SPAM_Data_Transformation/SPAM2010_geotiff_aggregationADM1.R
index d83a52c..21c3190 100644
--- a/SPAM_Data_Transformation/SPAM2010_geotiff_aggregationADM1.R
+++ b/SPAM_Data_Transformation/SPAM2010_geotiff_aggregationADM1.R
@@ -1255,7 +1255,7 @@ VeggieFun_area = function(spam){
     dplyr::select(-VEGE)
 }
 
-Fun_wide = function(dat){
+Fun_swide = function(dat){
   dat %>%pivot_wider(names_from = crop, values_from = valu)
 }
 
-- 
GitLab