diff --git a/Raster extraction/Spatial_SPAM.R b/Raster extraction/Spatial_SPAM.R
new file mode 100644
index 0000000000000000000000000000000000000000..4c49067185c55e613b9fb2cd27defb9da4dfed9c
--- /dev/null
+++ b/Raster extraction/Spatial_SPAM.R	
@@ -0,0 +1,218 @@
+easypackages::packages(c("tidyverse", 'sf', 'raster','sp','dplyr','spatialEco',
+                         'exactextractr','tibble', 'here', "readr", "margrittr","tydr",
+                         "readxl", "foreign", "data.table", "spData","spDataLarge", "rgdal"))
+
+# remotes::install_github("Nowosad/spDataLarge")
+# seting project directory
+here('Input_data')
+# Raster to point extraction  ---------------------------------------------
+# Making SPAM spatial explicit 
+dat_spam=read_csv("spam2010V2r0_global_Y_TA.csv")
+dat_spam=dplyr::select(dat_spam, x,y, alloc_key, name_adm1, iso3)
+setwd
+# Make SPAM spatial 
+# projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
+# proj_mercator = "+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs"
+proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below 
+proj_meter = "+datum=WGS84 +units=m +no_defs"
+dat_cent <- st_as_sf(x = dat_spam,                         
+                     coords = c("x", "y"),
+                     crs = proj)
+
+
+
+# dat_long = st_as_sf(coords = c("lon", "lat"))
+
+# dat_cent = filter(dat_cent, iso3=="FIN")
+# dat_cent=dat_cent[1:10,]
+
+
+# import raster datasets 
+dat_rast = raster("D:/Raw_dat/Raw_dat/Phosphorus/nitrogen-fertilizer-global-geotif/NFertilizer_global_geotif/nfertilizer_global.tif")
+dat_rast = raster("D:/Raw_dat/Raw_dat/Phosphorus/nitrogen-fertilizer-global-geotif/NFertilizer_global_geotif/nfertilizer_global.tif")
+dat_rast = raster("D:/Raw_dat/Raw_dat/Phosphorus/nitrogen-fertilizer-global-geotif/NFertilizer_global_geotif/nfertilizer_global.tif")
+dat_rast_SOC = raster(here("Input_data", "sol_organic.carbon_usda.tif"))
+
+
+dat_rast_peat = raster(here("Input_data", "PeatlandMap_A3_300dpi_GeoTIF_final.tif"))
+length(dat_rast_peat)
+is.na(dat_rast_peat)
+
+dat_rast_ISCRIC = raster("D:/Raw_dat/Raw_dat/Soil maps/wise_05min_v12/wise_05min_v12/Grid/smw5by5min/w001001.adf")
+dat_clim=getData('worldclim', var='bio', res=5) 
+dat_rast_meantemp = dat_clim[[1]]
+
+# Set the coordinate reference system (CRS) (i.e., define the projection).
+projection(dat_rast_SOC) <- "+proj=longlat +datum=WGS84 +no_defs +units=m"
+
+plot(clim[[1:4]])
+# Import Polygons
+
+# Point extraction of raster values and add it to the SPAM point --------
+funrast = function(r){                  #insert the raster dataset from which you would like to extract values 
+  r = raster::extract(r,                # raster layer
+                      dat_cent,         #  SF with centroids for buffer
+                      buffer = 5000,    # buffer size, units depend on CRS. Here in meters.10x10km(~5km radius)
+                      fun=mean,         # what value to extract
+                      df=TRUE)          # creating a df TRUE
+  r$ID = dat_cent$alloc_key # assigning the identifier of the original df to the extracted df
+  dat_spam <- merge(dat_spam, r, by.x = 'alloc_key', by.y = 'ID')# merge the two files together 
+  }
+
+system.time(
+  funrast(dat_rast)
+  )
+
+# Extraction
+ex_Nfert_spam = funrast(dat_rast)
+ex_soil_spam = funrast(dat_rast_ISCRIC)
+ex_SOC_spam = funrast(dat_rast_SOC)
+# start SOC extraction at 7.30, 19.th
+
+filter(test, nfertilizer_global>50)
+summary(test)
+
+
+# Zonal statistics  -------------------------------------------------------
+# Load polygons
+wd=setwd("C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/GADM/gadm36_shp")
+
+# Load and check shapefile 
+shape <- read_sf(dsn = wd, layer = "gadm36") # multipolygons containing all admin levels globally
+# class(shape)
+# shape$GID
+shape=select(shape, 'UID', 'geometry', 'GID_0')
+# y=filter(x, GID_0 == 'ALG') 
+
+# Load raster 
+rast=dat_rast = raster("D:/Raw_dat/Raw_dat/Phosphorus/nitrogen-fertilizer-global-geotif/NFertilizer_global_geotif/nfertilizer_global.tif")
+
+
+# Perform zonal statistics ------------------------------------------------
+FunZone=function(rast, shape){ # raster and vector
+  c=zonal.stats(shape, d, stats = c("mean")) 
+  c$UID <- shape$UID  # assigning the identifier of the original df to the extracted df
+  dat <- merge(shape, c, by.x = 'UID', by.y = 'UID') # merge the two files togethere
+}
+
+dat$mean.nfertilizer_global
+summary(dat$mean.nfertilizer_global)
+
+
+
+c=zonal.stats(y, dat_rast, stats = c("mean")) 
+c$UID <- x$UID  # assigning the identifier of the original df to the extracted df
+dat <- merge(shape, c, by.x = 'UID', by.y = 'UID') # merge the two files together 
+
+# Point = overlay(rast, df) #does not work
+
+BufferFun <- function(rast,ds) {
+  raster::extract(rast,            # raster layer
+                  ds,              #  SF with centroids for buffer
+                  buffer = 5000,   # buffer size, units depend on CRS. Here in meters.10x10km(~5km radius)
+                  fun=mean,        # what value to extract
+                  df=TRUE)
+  # ,         # creating a df TRUE
+                  # na.rm=TRUE)      # if is.na is true
+}
+
+
+cent_mean_FIN$ID = ds$alloc_key # assigning the identifier of the original df to the extracted df
+centroids <- merge(dat, cent_mean_FIN, by.x = 'alloc_key', by.y = 'ID') # merge the two files together 
+
+# Joining GADM with SPAM -------------------------------------------------------
+# Load and check shapefile 
+shape=read_sf(dsn = here("Input_data"), layer = "gadm36") # multipolygons containing all admin levels globally
+shape=dplyr::select(shape, 'UID', 'geometry', 'GID_0', 'GID_1')
+# shape_fil=filter(shape, GID_0 == 'FIN')
+
+# Making SPAM spatial explicit 
+dat_spam=read_csv(here("Input_data", "spam2010V2r0_global_Y_TA.csv"))
+dat_spam=dplyr::select(dat_spam, 'x','y', 'alloc_key')
+
+# Make SPAM spatial 
+projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
+dat_cent <- st_as_sf(x = dat_spam,                         
+                     coords = c("x", "y"),
+                     crs = projcrs)
+
+class(dat_cent)
+# dat_cent = filter(df, iso3=="FIN")
+
+
+SPAM_GADM_JOIN_GIS_left = st_join(dat_cent, shape, join = st_within, left = T) #https://postgis.net/docs/ST_Within.html 
+# head(SPAM_GADM_JOIN_GIS)
+# class(SPAM_GADM_Join)
+
+st_write(obj = SPAM_GADM_JOIN_GIS, dsn = here("Input_data"), driver = "ESRI Shapefile")
+st_write(obj = SPAM_GADM_JOIN_GIS_left, dsn = here("Input_data"), driver = "ESRI Shapefile")
+
+
+# Joining SPAM and GAUL  --------------------------------------------------
+EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", 
+            "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR")
+
+EU_longGAUL = c("Austria", "Belgium","Finland", "Bulgaria", "Croatia", "Czech Republic", "Denmark", "Spain", "France",
+                "Germany", "Greece", "Ireland", " Italy", " Latvia", " Lithuania", "Luxembourg","Malta", "Netherlands", 
+                "Poland", "Portugal", "Rumania", "Slovakia", "Slovenia", "Estonia", "Sweden", "U.K. of Great Britain and Northern Ireland")
+
+# dat_gaul_asap=read_sf(here("Input_data", "gaul1_asap.shp")) #modified GAUL map (less adm1 detail)
+dat_gaul=read_sf(here("Input_data", "g2008_1.shp")) #actual GAUL map 
+dat_spam=read_csv(here("Input_data", "spam2010V2r0_global_Y_TA.csv"))
+dat_spam_EU = dat_spam %>% 
+  dplyr::select(alloc_key, name_adm1, name_cntr, x,y,iso3) %>% 
+  filter(iso3 %in% EU_iso3)
+
+# Make the spam dataset spatial
+projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
+dat_spam_EU <- st_as_sf(x = dat_spam,              
+                     coords = c("x", "y"),
+                     crs = projcrs)
+
+dat_gaul = filter(dat_gaul, ADM0_NAME  %in% EU_longGAUL)
+
+# Joining SPAM and GAUL for EU27
+SPAM_GAUL_JOIN_GIS = st_join(dat_spam_EU, dat_gaul, join = st_within, left = T) #https://postgis.net/docs/ST_Within.html 
+SPAM_GAUL_JOIN_GIS_intersect = st_join(dat_spam_EU, dat_gaul, join = st_intersects, left = T) 
+
+# Joining the geometries of GAUL and SPAM
+dat_gaul_fil = dat_gaul %>% dplyr::select(name1, geometry) %>% 
+  slice_head(n=3)
+geometry_join = left_join(SPAM_GAUL_JOIN_GIS, dat_gaul_fil, by = "name1")
+
+# ZOnal statistics > Calculating mean of SOC for EU27 
+SPAM_GAUL_JOIN_GIS_fil = SPAM_GAUL_JOIN_GIS %>% 
+  dplyr::select(alloc_key, geometry)
+  top_n(n=20)
+  
+c=zonal.stats(dat_gaul_fil, dat_rast_SOC, stats = c("mean")) 
+
+
+# Extracting SOC on pixel level -------------------------------------------
+
+dat_spam_EU_fil = dat_spam_EU %>% 
+  top_n(n=50)
+proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below 
+proj_meter = "+datum=WGS84 +units=m +no_defs"
+dat_cent <- st_as_sf(x = dat_spam_EU_fil,                         
+                     coords = c("x", "y"),
+                     crs = proj)
+#testing the NA remove
+SOC_rast_wNA = BufferFun(SOC_rast,dat_cent) 
+SOC_rast_NoNa = BufferFun(dat_rast_SOC,dat_cent) #5000 m buffer, witout nA
+
+
+SOC_dat_spam_EU = cbind(SOC_rast,dat_spam_EU)
+write_csv(SOC_dat_spam_EU, here("Input_data", "SOC_pixel_EU.csv")) 
+
+# Calculating percentages and Peat index
+SOC_EU_pix_perc = SOC_dat_spam_EU %>% 
+  mutate(percentageSOC = sol_organic.carbon_usda/2) %>% 
+  mutate(peat_index = if_else(percentageSOC > 30, "Peat", "Non-peat")) %>% 
+  write_csv(here("Input_data", "SOCperc_pixel_EU_Peat.csv")) 
+
+# Then aggregate to adm1 level and bind.
+
+
+cent_mean_FIN$ID = ds$alloc_key # assigning the identifier of the original df to the extracted df
+centroids <- merge(dat, cent_mean_FIN, by.x = 'alloc_key', by.y = 'ID') # merge the two files together 
diff --git a/Raster extraction/Spatial_SPAM2.R b/Raster extraction/Spatial_SPAM2.R
new file mode 100644
index 0000000000000000000000000000000000000000..5f683f185cad25fb8465b5ea096c94387b00659c
--- /dev/null
+++ b/Raster extraction/Spatial_SPAM2.R	
@@ -0,0 +1,327 @@
+easypackages::packages(c("tidyverse", 'sf', 'raster','sp','dplyr','spatialEco',
+                         'exactextractr','tibble', 'here', "readr", "margrittr","tydr",
+                         "readxl", "foreign", "data.table", "spData","spDataLarge", "purrr", "rgdal"
+                         ))
+
+remotes::install_github("Nowosad/spDataLarge")
+setwd(here("Input_data"))
+
+# Raster to point extraction  ---------------------------------------------
+# Making SPAM spatial explicit 
+dat_spam=read_csv("C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_yield.csv/spam2010V2r0_global_Y_TA.csv")
+dat_spam=dplyr::select(dat_spam, x,y, alloc_key, name_adm1, iso3)
+
+# Make SPAM spatial 
+# projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
+# proj_mercator = "+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs"
+proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below 
+proj_meter = "+datum=WGS84 +units=m +no_defs"
+dat_cent <- st_as_sf(x = dat_spam,                         
+                     coords = c("x", "y"),
+                     crs = proj)
+
+
+
+# dat_long = st_as_sf(coords = c("lon", "lat"))
+
+# dat_cent = filter(dat_cent, iso3=="FIN")
+# dat_cent=dat_cent[1:10,]
+
+
+# import raster datasets 
+dat_rast = raster("D:/Raw_dat/Raw_dat/Phosphorus/nitrogen-fertilizer-global-geotif/NFertilizer_global_geotif/nfertilizer_global.tif")
+dat_rast = raster("D:/Raw_dat/Raw_dat/Phosphorus/nitrogen-fertilizer-global-geotif/NFertilizer_global_geotif/nfertilizer_global.tif")
+dat_rast = raster("D:/Raw_dat/Raw_dat/Phosphorus/nitrogen-fertilizer-global-geotif/NFertilizer_global_geotif/nfertilizer_global.tif")
+dat_rast_SOC = raster(here("Input_data", "sol_organic.carbon_usda.tif"))
+dat_rast_peat = raster(here("Input_data", "PeatlandMap_A3_300dpi_GeoTIF_final.tif"))
+dat_rast_ISCRIC = raster("D:/Raw_dat/Raw_dat/Soil maps/wise_05min_v12/wise_05min_v12/Grid/smw5by5min/w001001.adf")
+dat_clim=getData('worldclim', var='bio', res=5) 
+dat_rast_meantemp = dat_clim[[1]]
+
+# Set the coordinate reference system (CRS) (i.e., define the projection).
+projection(dat_rast_SOC) <- "+proj=longlat +datum=WGS84 +no_defs +units=m"
+
+plot(clim[[1:4]])
+# Import Polygons
+
+# Point extraction of raster values and add it to the SPAM point --------
+funrast = function(r){                  #insert the raster dataset from which you would like to extract values 
+  r = raster::extract(r,                # raster layer
+                      dat_cent,         #  SF with centroids for buffer
+                      buffer = 5000,    # buffer size, units depend on CRS. Here in meters.10x10km(~5km radius)
+                      fun=mean,         # what value to extract
+                      df=TRUE)          # creating a df TRUE
+  r$ID = dat_cent$alloc_key # assigning the identifier of the original df to the extracted df
+  dat_spam <- merge(dat_spam, r, by.x = 'alloc_key', by.y = 'ID')# merge the two files together 
+}
+
+
+
+# Extraction
+ex_Nfert_spam = funrast(dat_rast)
+ex_soil_spam = funrast(dat_rast_ISCRIC)
+ex_SOC_spam = funrast(dat_rast_SOC)
+# start SOC extraction at 7.30, 19.th
+
+filter(test, nfertilizer_global>50)
+summary(test)
+
+# Zonal statistics  -------------------------------------------------------
+# Load polygons
+wd=setwd("C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/GADM/gadm36_shp")
+
+# Load and check shapefile 
+shape <- read_sf(dsn = here("Input_data"), layer = "gadm36") # multipolygons containing all admin levels globally
+
+# class(shape)
+# shape$GID
+shape=dplyr::select(shape, 'UID', 'geometry', 'GID_0', "GID_1")
+# y=filter(x, GID_0 == 'ALG') 
+
+shape_gaul=read_sf(here("Input_data", "g2008_1.shp")) #actual GAUL map 
+
+
+# Load raster 
+rast=dat_rast = raster("D:/Raw_dat/Raw_dat/Phosphorus/nitrogen-fertilizer-global-geotif/NFertilizer_global_geotif/nfertilizer_global.tif")
+
+
+# Perform zonal statistics ------------------------------------------------
+FunZone=function(rast, shape){ # raster and vector
+  c=zonal.stats(shape, d, stats = c("mean")) 
+  c$UID <- shape$UID  # assigning the identifier of the original df to the extracted df
+  dat <- merge(shape, c, by.x = 'UID', by.y = 'UID') # merge the two files togethere
+}
+
+dat$mean.nfertilizer_global
+summary(dat$mean.nfertilizer_global)
+
+
+
+c=zonal.stats(y, dat_rast, stats = c("mean")) 
+c$UID <- x$UID  # assigning the identifier of the original df to the extracted df
+dat <- merge(shape, c, by.x = 'UID', by.y = 'UID') # merge the two files together 
+
+# Point = overlay(rast, df) #does not work
+
+BufferFun <- function(rast,ds) {
+  raster::extract(rast,            # raster layer
+                  ds,              #  SF with centroids for buffer
+                  buffer = 5000,   # buffer size, units depend on CRS. Here in meters.10x10km(~5km radius)
+                  fun=mean,        # what value to extract
+                  df=TRUE)
+  # ,         # creating a df TRUE
+  # na.rm=TRUE)      # if is.na is true
+}
+
+
+cent_mean_FIN$ID = ds$alloc_key # assigning the identifier of the original df to the extracted df
+centroids <- merge(dat, cent_mean_FIN, by.x = 'alloc_key', by.y = 'ID') # merge the two files together 
+
+
+# Clip raster  ------------------------------------------------------------
+# https://www.earthdatascience.org/courses/earth-analytics/lidar-raster-data-r/crop-raster-data-in-r/
+crop_extent <- read_sf(here("Input_data", "shape_gaul_fin_adm1.shp")) 
+
+# Plot raster 
+plot(dat_rast_SOC_perc) #validated as hald of dat_rast_SOC below
+# plot(dat_rast_SOC)
+
+# Plot shapefile
+plot(crop_extent %>%dplyr::select(ADM1_CODE),
+     main = "Shapefile imported into R - crop extent",
+     axes = T,
+     border = "blue")
+
+# crop the lidar raster using the vector extent
+SOC_test_perc <- crop(dat_rast_SOC_perc, crop_extent)
+plot(crop, main = "Cropped soc perc")
+
+# add shapefile on top of the existing raster
+plot(crop_extent, add = TRUE)
+
+
+
+
+# Joining GADM with SPAM -------------------------------------------------------
+# Load and check shapefile 
+shape=read_sf(dsn = here("Input_data"), layer = "gadm36") # multipolygons containing all admin levels globally
+shape=dplyr::select(shape, 'UID', 'geometry', 'GID_0', 'GID_1')
+# shape_fil=filter(shape, GID_0 == 'FIN')
+
+# Making SPAM spatial explicit 
+dat_spam=read_csv(here("Input_data", "spam2010V2r0_global_Y_TA.csv"))
+dat_spam=dplyr::select(dat_spam, 'x','y', 'alloc_key')
+
+# Make SPAM spatial 
+projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
+dat_spam <- st_as_sf(x = dat_spam,                         
+                     coords = c("x", "y"),
+                     crs = projcrs)
+
+class(dat_cent)
+# dat_cent = filter(df, iso3=="FIN")
+
+
+SPAM_GADM_JOIN_GIS_left = st_join(dat_spam, shape, join = st_within, left = T) #https://postgis.net/docs/ST_Within.html 
+# head(SPAM_GADM_JOIN_GIS)
+# class(SPAM_GADM_Join)
+
+st_write(obj = SPAM_GADM_JOIN_GIS, dsn = here("Input_data"), driver = "ESRI Shapefile")
+st_write(obj = SPAM_GADM_JOIN_GIS_left, dsn = here("Input_data"), driver = "ESRI Shapefile")
+
+
+# Joining SPAM and GAUL and GADM EU --------------------------------------------------
+EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", 
+            "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR")
+
+EU_longGAUL = c("Austria", "Belgium","Finland", "Bulgaria", "Croatia", "Czech Republic", "Denmark", "Spain", "France",
+                "Germany", "Greece", "Ireland", " Italy", " Latvia", " Lithuania", "Luxembourg","Malta", "Netherlands", 
+                "Poland", "Portugal", "Rumania", "Slovakia", "Slovenia", "Estonia", "Sweden", "U.K. of Great Britain and Northern Ireland")
+
+dat_gaul=read_sf(here("Input_data", "gaul1_asap.shp"))
+
+
+dat_spam=read_csv(here("Input_data", "spam2010V2r0_global_Y_TA.csv"))
+dat_spam_EU = dat_spam %>% 
+  dplyr::select(alloc_key, name_adm1, name_cntr, x,y,iso3) %>% 
+  filter(iso3 %in% EU_iso3)
+
+# Make the spam dataset spatial
+projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
+dat_spam_EU <- st_as_sf(x = dat_spam_EU,              
+                        coords = c("x", "y"),
+                        crs = projcrs)
+
+dat_gaul=read_sf(here("Input_data", "gaul1_asap.shp"))
+dat_gaul <- st_as_sf(x = dat_gaul,              
+                        coords = c("x", "y"),
+                        crs = projcrs)
+
+# Source: http://worldmap.harvard.edu/data/geonode:g2008_1
+dat_gaul_adm1=read_sf(here("Input_data", "g2008_1.shp"))
+dat_gaul_adm1 <- st_as_sf(x = dat_gaul_adm1,              
+                     coords = c("x", "y"),
+                     crs = projcrs)
+
+GADM <- read_sf(dsn = here("Input_data"), layer = "gadm36") # multipolygons containing all admin levels globally
+dat_gaul <- st_as_sf(x = GADM,              
+                     coords = c("x", "y"),
+                     crs = projcrs)
+
+# dat_gaul_EU = dat_gaul %>% filter(name0 %in% EU_longGAUL)
+
+# # Joining SPAM and GAUL for EU27
+# # SPAM_GAUL_JOIN_GIS = st_join(
+# # ZOnal statistics > Calculating mean of SOC for EU27 
+# SPAM_GAUL_JOIN_GIS_fil = SPAM_GAUL_JOIN_GIS %>% 
+#   dplyr::select(alloc_key, geometry)
+# top_n(n=20)
+
+# c=zonal.stats(dat_gaul_fil, dat_rast_SOC, stats = c("mean")) 
+# (dat_spam_EU, dat_gaul, join = st_within, left = T) #https://postgis.net/docs/ST_Within.html 
+SPAM_GAUL_JOIN_GIS_intersect = st_join(dat_spam_EU, dat_gaul_adm1, join = st_intersects, left = T) #, left = T) 
+SPAM_GAUL_GADM = st_join(SPAM_GAUL_JOIN_GIS_intersect, GADM, join = st_intersects, left = T) 
+
+SPAM_GAUL_GADM_select = SPAM_GAUL_GADM %>% 
+  dplyr::select(iso3, name_cntr, name_adm1, geometry,  #SPAM
+                name1, name1_shr, name0,#                 GAUL 
+                GID_1, GID_0, NAME_1, VARNAME_1) %>% #                           GADM
+  write_sf(here("Input_data", "SPAM_GAUL_SF_join_intersect.shp"), driver = "ESRI Shapefile")
+  
+# Adding the GADM polygon geometries to the SPAM_GAUL_GADM dataset 
+GADM_df = as_tibble(GADM)
+SPAM_GADM_GAUL_df = as_tibble(SPAM_GAUL_GADM_select) %>% 
+  dplyr::select(geometry, GID_1) %>% 
+  left_join(., GADM_df, by = "GID_1")
+
+# Extracting PEAT /non-PEAT -------------------------------------------
+# Reclassify peat = 1, non=peat = 0; Peat is defined as SOC => 30% organic matter 
+
+
+# create classification matrix
+dat_rast_SOC_perc <- dat_rast_SOC/2 
+
+reclass_df <- c(0, 30, 0,
+                30, Inf, 1)
+writeRaster(dat_rast_SOC_perc, here("Input_data", "dat_SOC_USDA_percent.tiff"))
+
+# reshape the object into a matrix with columns and rows
+reclass_m <- matrix(reclass_df,
+                    ncol = 3,
+                    byrow = TRUE)
+# reclassify the raster using the reclass object - reclass_md
+SOC_reclas <- reclassify(dat_rst_SOC_perc,
+                         reclass_m)
+
+writeRaster(SOC_reclas, here("Input_data", "SOC_reclass_perc.tif"))
+SOC_reclass = raster(here("Input_data", "SOC_reclass_perc.tif"))
+cellstats(SOC_reaclass)
+??cellstats
+cellStats(SOC_reclass, stat = max)
+
+shape_gaul=read_sf(here("Input_data", "g2008_1.shp")) #actual GAUL map 
+shape_gaul=dplyr::select(shape_gaul, 'G2008_1_ID',  'ADM1_CODE', "ADM0_CODE", 'ADM0_NAME', 'ADM1_NAME', "CONTINENT", "REGION", "geometry")
+
+shape2 = shape_gaul %>% slice(1:1000)
+
+extract()
+?extract
+EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", 
+            "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR")
+# write.csv(EU_iso3, here("Input_data", "EU27_iso3_lst.csv"))
+length(EU_iso3) #27 member states > the list comes from gams
+
+# Check if the EU countries iso3 are all present in dat.phys etc.
+# EU_iso3 %in% dat.phys$iso3 #Validation if all EU27 countries are in the dfs below
+shapeGL = shape_gaul %>%
+  filter(ADM1_CODE == 1244) %>% 
+  write_sf(here("Input_data", "shape_gaul_fin_adm1.shp"))
+  
+
+
+freqs = exact_extract(SOC_reclas, shapeGL, function(value, coverage_fraction)   {
+    sum(coverage_fraction[value == 1], na.rm = T) / sum(coverage_fraction, na.rm = T)})
+
+BufferFun <- function(rast,ds) {
+  exact_extract(rast,            # raster layer
+                as(ds, "sf"),              #  SF with centroids for buffer
+                buffer = 5000)
+  #,   # buffer size, units depend on CRS. Here in meters.10x10km(~5km radius)
+  # fun=mean,        # what value to extract
+  # df=TRUE)
+  # ,         # creating a df TRUE
+  # na.rm=TRUE)      # if is.na is true
+}
+
+?exact_ectract
+
+min(SOC_reclas)
+cellStats(SOC_reclas, count)
+
+
+dat_spam_EU_fil = dat_spam_EU %>% 
+  top_n(n=50)
+proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below 
+proj_meter = "+datum=WGS84 +units=m +no_defs"
+dat_cent <- st_as_sf(x = dat_spam_EU_fil,                         
+                     coords = c("x", "y"),
+                     crs = proj)
+#testing the NA remove
+SOC_rast_wNA = BufferFun(SOC_rast,dat_cent) 
+SOC_rast_NoNa = BufferFun(dat_rast_SOC,dat_cent) #5000 m buffer, witout nA
+
+
+SOC_dat_spam_EU = cbind(SOC_rast,dat_spam_EU)
+write_csv(SOC_dat_spam_EU, here("Input_data", "SOC_pixel_EU.csv")) 
+
+# Calculating percentages and Peat index
+SOC_EU_pix_perc = SOC_dat_spam_EU %>% 
+  mutate(percentageSOC = sol_organic.carbon_usda/2) %>% 
+  mutate(peat_index = if_else(percentageSOC > 30, "Peat", "Non-peat")) %>% 
+  write_csv(here("Input_data", "SOCperc_pixel_EU_Peat.csv")) 
+
+# Then aggregate to adm1 level and bind.
+
+
+cent_mean_FIN$ID = ds$alloc_key # assigning the identifier of the original df to the extracted df
+centroids <- merge(dat, cent_mean_FIN, by.x = 'alloc_key', by.y = 'ID') # merge the two files together 
diff --git a/Raster extraction/peat.R b/Raster extraction/peat.R
new file mode 100644
index 0000000000000000000000000000000000000000..fb4ed106c55ffbe2ef127e5e5a988a9ba0ae81b4
--- /dev/null
+++ b/Raster extraction/peat.R	
@@ -0,0 +1,363 @@
+easypackages::packages(c("tidyverse", 'sf', 'raster','sp','dplyr','spatialEco',
+                         'exactextractr','tibble', 'here', "readr", "margrittr","tydr",
+                         "readxl", "foreign", "data.table", "spData","spDataLarge", "purrr", "rgdal", "lwgeom"
+))
+
+# Loading vector data 
+dat_spam=read_csv(here("Input_data", "spam2010V2r0_global_Y_TA.csv"))
+dat_spam=dplyr::select(dat_spam, x,y, alloc_key, name_adm1, iso3)
+
+# Make SPAM spatial 
+# projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
+# proj_mercator = "+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs"
+proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below 
+proj_meter = "+datum=WGS84 +units=m +no_defs"
+dat_cent <- st_as_sf(x = dat_spam,                         
+                     coords = c("x", "y"),
+                     crs = proj)
+
+
+shape_gaul=read_sf(here("Input_data", "g2008_1.shp")) #actual GAUL map 
+shape_gaul=dplyr::select(shape_gaul, 'G2008_1_ID',  'ADM1_CODE', "ADM0_CODE", 'ADM0_NAME', 'ADM1_NAME', "CONTINENT", "REGION", "geometry")
+
+# country_mapping
+cntr_map_global = read_sf(here("Input_data", "GAUL2008_SPAM_LINK", "GAUL2008_SPAM_LINK.shp")) #checked the map. IS te right GAUL map 
+
+# SUbsetting EU-27
+EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", 
+            "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR")
+
+cntr_EU2_gaul_shape = cntr_map_global %>% #Pixel level data
+  filter(iso3 %in% EU_iso3) %>% 
+  dplyr::select(alloc_key,G2008_1_ID,iso3,name_cntr,name_adm1,ADM1_CODE, ADM0_CODE)  
+# distinct_at(vars(G2008_1_ID, iso3)) %>% 
+# st_join(., shape_gaul, join = st_intersects, right = T)
+
+# With this shape file we can clip the extent of EU27 area
+gaul_EU27 = st_join(shape_gaul, cntr_EU2_gaul_shape, join = st_intersects, left = T) %>% 
+  drop_na() %>% 
+  distinct_at(vars(G2008_1_ID.x, alloc_key, ADM1_NAME, ADM0_NAME, geometry)) %>% 
+  rename(
+    # ADM1_NAME =ADM1_NAME.x,
+    # ADM0_NAME =ADM0_NAME.x,
+    G2008_1_ID =G2008_1_ID.x) %>% 
+  st_as_sf()
+
+# write_sf(gaul_EU27, here("Input_data", "gaul_EU27.shp"))
+read_sf(here("Input_data", "gaul_EU27.shp"))
+
+# tHIS IS FOR Futther clipping
+gaul_uniq_EU27 = gaul_EU27 %>% 
+  as_tibble() %>% 
+  dplyr::select(-alloc_key) %>% 
+  distinct_all() %>% 
+  st_as_sf #%>% 
+  # write_sf(here("Input_data", "gaul_uniq_EU27.shp"))
+
+gaul_unique_EU27 = read_sf((here("Input_data", "gaul_uniq_EU27.shp")))
+
+# verifying the clipping. Issue: looks skewed in the norther countries.. 
+plot(gaul_uniq_EU27 %>% dplyr::select(ADM1_NAME))
+
+# # Check if 27 EU memeberstates > CORRECT!
+# lengthEU27 = gaul_EU27 %>% 
+#   as_tibble() %>% 
+#   distinct_at(vars(ADM0_NAME)) 
+
+# Loading raster data 
+dat_rast_SOC = raster(here("Input_data", "sol_organic.carbon_usda.tif"))
+
+
+# dat_rast_SOC_perc <- dat_rast_SOC/2 #conversion to percentage according to openlandmap website
+# writeRaster(dat_rast_SOC_perc, here("Input_data", "dat_SOC_USDA_percent.tiff")) #validated 
+dat_rast_SOC_perc=raster(here("Input_data", "dat_SOC_USDA_percent.tif"))
+# crs(dat_rast_SOC_perc)
+
+# Reclassify raster  ------------------------------------------------------
+# https://www.earthdatascience.org/courses/earth-analytics/lidar-raster-data-r/classify-raster/
+
+# # Creating threshold for reclassification 
+# reclass_df <- c(0, 30, 0,
+#                 30, Inf, 1)
+# 
+# # reshape the object into a matrix with columns and rows
+# reclass_m <- matrix(reclass_df,
+#                     ncol = 3,
+#                     byrow = TRUE)
+# # reclassify the raster using the reclass object - reclass_md
+# SOC_reclas <- reclassify(dat_rst_SOC_perc,
+#                          reclass_m)
+# 
+# writeRaster(SOC_reclas, here("Input_data", "SOC_reclass_perc.tif"))
+# SOC_reclass = raster(here("Input_data", "SOC_reclass_perc.tif"))
+# 
+
+# Clip raster  ------------------------------------------------------------
+# https://www.earthdatascience.org/courses/earth-analytics/lidar-raster-data-r/crop-raster-data-in-r/
+crop_extent <- read_sf(here("Input_data", "gaul_uniq_EU27.shp"))
+
+# Plot raster 
+# plot(dat_rast_SOC_perc) #validated as hald of dat_rast_SOC below
+# # plot(dat_rast_SOC)
+# 
+# # Plot shapefile
+# plot(crop_extent %>%dplyr::select(ADM1_CODE),
+#      main = "Shapefile imported into R - crop extent",
+#      axes = T,
+#      border = "blue")
+# 
+# # crop the lidar raster using the vector extent
+# SOC_test_perc <- crop(dat_rast_SOC_perc, crop_extent)
+# plot(crop, main = "Cropped soc perc")
+# 
+# # add shapefile on top of the existing raster
+# plot(crop_extent, add = TRUE)
+# 
+# plot(gaul_uniq_EU27)
+# # EU raster CLIP
+EU27_area_clip <- crop(dat_rast_SOC_perc, gaul_uniq_EU27)
+# plot(EU27_area_clip)
+# plot(dat_rast_SOC_perc)
+writeRaster(EU27_area_clip, here("Input_data", "EU27Areaclip.tif"))
+EU27_area_clip = raster(here("Input_data", "EU27Areaclip.tif"))
+# Note: takes only a few seconds for EU
+
+## Raster to point polygon (https://geocompr.robinlovelace.net/geometric-operations.html#spatial-vectorization)
+Sys.time()
+SOC_perc_EU27_points = rasterToPoints(EU27_area_clip, spatial = TRUE)
+Sys.time()
+
+save(SOC_perc_EU27_points,file=here("Input_data", "SOC_points_perc_EU.RData")) 
+
+gc(reset=T)
+
+Sys.time()
+st_as_sf(SOC_perc_EU27_points)
+Sys.time()
+
+
+
+# # Alternatively to convert raster to points
+# SOC_perc_EU27_points_sf = st_as_sf(EU27_area_clip, as_points = TRUE, merge = FALSE)
+# ?st_as_sf
+# 
+# 
+# write_sf(SOC_perc_EU27_points, here("Input_data", "SOC_perc_EU27_points.shp")) # memory exhaust
+# Sys.time()
+# # max(SOC_points)
+# > summary(SOC_points$sol_organic.carbon_usda)
+# Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+# 0.000   4.000   5.000   5.877   7.000  60.000 
+
+
+# HWSD map ----------------------------------------------------------------
+# Here the  polygon is imported - this shape file was created in Qgis 
+SOC_perc_EU27Q = st_read(here("Input_data","point_soc_perc_EU.shp"))
+
+gaul_unique_EU27 = read_sf((here("Input_data", "gaul_uniq_EU27.shp")))
+SOC_topsoil_SOCperc_HWSD_Q= st_read(here("Input_data","SOC_topsoil_SOCperc_HWSD_Q.shp"), 
+                                    agr = c(SOC = "constant"))
+SOC_topsoil_SOCperc_HWSD_Q 
+## Spatial Join with SPAM pixels 
+# All SOC points are remained and only in next step summarized
+Sys.time()
+Join_SOC_gaul_EU27_HWSD = st_join(SOC_topsoil_SOCperc_HWSD_Q, gaul_unique_EU27, join = st_intersects, left = T)
+Sys.time()
+# 
+# intersection = st_intersection(SOC_topsoil_SOCperc_HWSD_Q,gaul_unique_EU27)# 
+# Join_SOC_gaulLeft_EU27_HWSD = st_join(gaul_unique_EU27, SOC_topsoil_SOCperc_HWSD_Q)
+# plot(Join_pointSOC_gaul_EU27_HWSD)
+# plot(gaul_uniq_EU27)
+
+# Recalulating area - since we do not have point polygons 
+SOC_HWSD = Join_SOC_gaul_EU27_HWSD %>% 
+  # rowwise() %>% 
+  mutate(area = geometry %>%
+           st_zm() %>%
+           st_area())
+# test area
+lapland = SOC_HWSD %>% filter(ADM1_NAME == "Lapland") %>% 
+  as_tibble() %>% 
+    summarise(sum(area))
+#Finland Lappland = 403498.752337km2 - h
+
+# Define new factors peat nonpeat based on SOC condition 
+SOC_prop_HWSD = SOC_HWSD %>% 
+  rowwise() %>% 
+  mutate(peat = ifelse(SOC >= 30, "peat", "non_peat")) %>% #rowwise assigning peat non_peat as grouping variabkle
+  group_by(ADM1_NAME, peat) %>% 
+  mutate(area_adm1 = sum(area)) %>% 
+  ungroup() %>% 
+  distinct_at(.vars = 4)
+
+# Here we calulate the share of peat and non peat per adm1 units 
+# computationally intense > takes a few minutes only for EU
+SOC_prop_HWSD1 = SOC_prop_HWSD %>% 
+  group_by(ADM1_NAME, ADM0_NAME, peat) %>% 
+  summarise(area_n_peat = mean(area_adm1))
+
+SOC_prop_HWSD2= SOC_prop_HWSD1 %>%
+  as_tibble() %>%
+  group_by(ADM1_NAME) %>% 
+  mutate(area_peat_prop = (area_n_peat)/sum(area_n_peat)*100) #%>% 
+  # write_sf(here("Input_data", "SOC_perc_adm1_EU27_HWSD.shp"))
+
+gaul_tbl = gaul_unique_EU27 %>% 
+  as_tibble() %>% 
+  dplyr::select(G2008_1_ID, ADM0_NAME, ADM1_NAME)
+
+# Creating a csv file without eometries
+SOC_gaul_HWSD_EU27_PEAT=SOC_prop_HWSD2  %>% 
+  dplyr::select(ADM0_NAME, ADM1_NAME, peat, area_peat_prop) %>% 
+  right_join(.,gaul_tbl, by="ADM1_NAME") %>% 
+  dplyr::select(-G2008_1_ID) %>% 
+  group_by(ADM1_NAME, peat) %>% 
+  distinct() %>% 
+  write_csv(here("Input_data", "SOC_gaul_HWSD_EU27_PEAT.csv")) 
+
+
+
+
+# Join back the ADM0 
+PEAT_gaul_EU27_HWSD = st_join(SOC_prop_HWSD2, gaul_unique_EU27, join = st_intersects, left = T)
+
+
+# write_csv(SOC_prop_HWSD2, here( "Input_data", "SOC_perc_adm1_EU27_HWSD.csv" ))
+peat = read_csv(here("Input_data", "SOC_perc_adm1_EU27_HWSD.csv"))
+
+# Checking the peat data 
+# Checking peat tibble data = area per peat and country
+SOC_gaul_HWSD_EU27_PEAT_check = SOC_gaul_HWSD_EU27_PEAT%>%                              # Applying group_by & summarise
+  group_by(ADM0_NAME.x, peat) %>%
+  summarize(mean(area_peat_prop)) %>% 
+  write_csv(here("Input_data", "SOC_perc_adm0ag_EU27_HWSD.csv"))
+
+
+
+peat2 = peat %>%  
+  group_by(ADM1_NAME, peat) %>% 
+  summarise(area_peat_prop) %>% 
+  st_as_sf() 
+  st_join()
+
+# max peat values. 
+peatmax = peat %>%  
+  filter(peat == "peat") %>% 
+  arrange()
+
+Gaul_area = gaul_unique_EU27 %>%  
+  rowwise() %>% 
+  mutate(area = geometry %>%
+           st_zm() %>%
+           st_area())
+#area finland
+HWSDSOC_area =SOC_topsoil_SOCperc_HWSD_Q%>%  
+  rowwise() %>% 
+  mutate(area = geometry %>%
+           st_zm() %>%
+           st_area())
+#area finland
+
+# Area checked for Lapland finland > real=10000km2. here=98859km2.
+
+# Join data back to 
+
+
+
+# Removing NAs - areas which are not in the area of EU27 show NA
+SOC_GAUL = Join_pointSOC_gaul_EU27 %>% 
+  drop_na() %>% 
+  
+         
+
+
+
+
+write_sf(Join_pointSOC_gaul_EU27, here("Input_data", "Join_pointSOC_gaul_EU27.shp"))
+# Test time: for ~9mio points we need ~3 min
+# start: "2021-01-25 21:58:05 CET"
+# end: "2021-01-25 22:00:53 CET"
+
+## Calculate the summary statistic
+
+
+# HWSD Intersection  ------------------------------------------------------
+gaul_unique_EU27 = read_sf((here("Input_data", "gaul_uniq_EU27.shp")))
+SOC_topsoil_SOCperc_HWSD_Q= st_read(here("Input_data","SOC_topsoil_SOCperc_HWSD_Q.shp"), 
+                                    agr = c(SOC = "constant"))
+intersection_SOC_gaulLeft_EU27_HWSD = st_intersection(st_make_valid(gaul_unique_EU27), st_make_valid(SOC_topsoil_SOCperc_HWSD_Q))
+# area
+SOC_HWSD_intersection = intersection_SOC_gaulLeft_EU27_HWSD %>% 
+  rowwise() %>% 
+  mutate(area = geometry %>%
+           st_zm() %>%
+           st_area())
+
+# 1- Define new factors peat nonpeat based on SOC condition 
+SOC_prop_HWSD_insectn = intersection_SOC_gaulLeft_EU27_HWSD %>% 
+  rowwise() %>% 
+  mutate(peat = ifelse(SOC >= 30, "peat", "non_peat")) #rowwise assigning peat non_peat as grouping variabkle
+  
+# aggregate to adm1 level https://r-spatial.github.io/sf/reference/aggregate.sf.html
+SOC_prop_HWSD_insectn_adm1 = SOC_prop_HWSD_insectn %>% 
+  aggregate(x= .$SOC, by = list(ADM0_NAME, ADM1_NAME, peat), FUN = "mean")
+  
+  group_by(ADM1_NAME, peat) 
+
+
+#%>% 
+  mutate(area_adm1 = sum(area)) %>% 
+  ungroup() %>% 
+  distinct_at(.vars = 4)
+
+
+
+
+# PEAT with zonal statistics  ---------------------------------------------
+# https://stackoverflow.com/questions/47691200/raster-in-r-create-zonal-count-of-specific-cell-values-without-reclassification
+
+# 
+# I would like to know if there is way to create zonal statistics for RasterLayerObjects, specifically the count of a given cell value (e.g. a land-use class) in R without having to reclassify the whole raster. The solution should be memory efficient in order to work on large raster files i.e. no extraction of the values into a matrix in R is desired.
+# 
+# Below an example of how I handle it until now. In this case I reclassify the original raster to hold only 1 for the value of interest and missings for all other values.
+# 
+# My proposed solution creates both, redundant data and additional processing steps to get me to my initial goal. I thought something like zonal(r1[r1==6],r2,"count") would work but obviously it does not (see below).
+
+# generate reproducible Raster 
+library("raster")
+
+## RASTER 1 (e.g. land-use classes)
+r1 <- raster( crs="+proj=utm +zone=31")
+extent(r1) <- extent(0, 100, 0, 100)
+res(r1) <- c(5, 5)
+values(r1) <- sample(10, ncell(r1), replace=TRUE)
+plot(r1)
+
+## RASTER 2 (containing zones of interest)
+r2 <- raster( crs="+proj=utm +zone=31")
+extent(r2) <- extent(0, 100, 0, 100)
+res(r2) <- c(5, 5)
+values(r2) <- c(rep(1,100),rep(2,100),rep(3,100),rep(4,100))
+plot(r2)
+
+# (1) ZONAL STATISTICS
+# a. how many cells per zone (independent of specific cell value)
+zonal(r1,r2,"count")
+
+# b. how many cells per zone of specific value 6
+zonal(r1[r1==6],r2,"count")
+# -> fails
+
+# with reclassification
+r1.reclass<-
+  reclassify(r1,
+             matrix(c(1,5,NA,
+                      5.5,6.5,1, #class of interest 
+                      6.5,10,NA),
+                    ncol=3,
+                    byrow = T),
+             include.lowest=T # include the lowest value from the table.
+  )
+zonal(r1.reclass,r2,"count")
+
+
diff --git a/SPAM_Data_Transformation/SPAM2010_aggregationADM1.R b/SPAM_Data_Transformation/SPAM2010_aggregationADM1.R
new file mode 100644
index 0000000000000000000000000000000000000000..884fce7276b92a27ceb51d3e30f6948cc6d18d7d
--- /dev/null
+++ b/SPAM_Data_Transformation/SPAM2010_aggregationADM1.R
@@ -0,0 +1,818 @@
+easypackages::packages(c("tidyverse", 'sf', 'raster','sp','dplyr','spatialEco',
+                         'exactextractr','tibble', 'here', "readr",
+                         "readxl", "foreign", "data.table", "rbenchmark", "benchmarkme",
+                         "validate", "plyr"))
+
+# Set working directory
+setwd(here::here("Input_data"))
+
+# Load data ----------------------------------------------------------------
+# Yields ------------------------------------------------------------------
+dat_TA_yld <- read.csv("spam2010V2r0_global_Y_TA.csv", locale = locale(encoding = "LATIN2"))# Yields - all technologies together, ie complete crop - Data downloaded on 1. of September 2020
+dat_TI_yld <- read_csv("spam2010V2r0_global_Y_TI.csv", locale = locale(encoding = "LATIN2"))# Yields - Iirrigated portion of crop
+dat_TH_yld <- read_csv("spam2010V2r0_global_Y_TH.csv", locale = locale(encoding = "LATIN2"))# Yields - rainfed high inputs portion of crop
+dat_TL_yld <- read_csv("spam2010V2r0_global_Y_TL.csv", locale = locale(encoding = "LATIN2"))# Yields - rainfed low inputs portion of crop
+dat_TS_yld <- read_csv("spam2010V2r0_global_Y_TS.csv",locale = locale(encoding = "LATIN2"))# Yields - rainfed subsistence portion of crop
+dat_TR_yld <- read_csv("spam2010V2r0_global_Y_TR.csv", locale = locale(encoding = "LATIN2"))# Yields - rainfed portion of crop (= TA - TI, or TH + TL + TS)
+
+# Production
+dat_TA_pr <- read_csv("spam2010V2r0_global_P_TA.csv",locale = locale(encoding = "LATIN2")) # LATIN2 encoding is used for proper display of nordic and slavic names
+dat_TI_pr <- read_csv("spam2010V2r0_global_P_TI.csv",locale = locale(encoding = "LATIN2"))
+dat_TH_pr <- read_csv("spam2010V2r0_global_P_TH.csv",locale = locale(encoding = "LATIN2"))
+dat_TL_pr <- read_csv("spam2010V2r0_global_P_TL.csv",locale = locale(encoding = "LATIN2"))
+dat_TS_pr <- read_csv("spam2010V2r0_global_P_TS.csv",locale = locale(encoding = "LATIN2"))
+dat_TR_pr <- read_csv("spam2010V2r0_global_P_TR.csv",locale = locale(encoding = "LATIN2"))
+
+# Physical area--------------------------------
+dat_TA_pa <- read_csv("spam2010V2r0_global_A_TA.csv",locale = locale(encoding = "LATIN2"))
+dat_TI_pa <- read_csv("spam2010V2r0_global_A_TI.csv",locale = locale(encoding = "LATIN2"))
+dat_TH_pa <- read_csv("spam2010V2r0_global_A_TH.csv",locale = locale(encoding = "LATIN2"))
+dat_TL_pa <- read_csv("spam2010V2r0_global_A_TL.csv",locale = locale(encoding = "LATIN2"))
+dat_TS_pa <- read_csv("spam2010V2r0_global_A_TS.csv",locale = locale(encoding = "LATIN2"))
+dat_TR_pa <- read_csv("spam2010V2r0_global_A_TR.csv",locale = locale(encoding = "LATIN2"))
+
+# Harvesting area----------------------------------
+dat_TA_ha <- read_csv("spam2010V2r0_global_H_TA.csv",locale = locale(encoding = "LATIN2"))
+dat_TI_ha <- read_csv("spam2010V2r0_global_H_TI.csv",locale = locale(encoding = "LATIN2"))
+dat_TH_ha <- read_csv("spam2010V2r0_global_H_TH.csv",locale = locale(encoding = "LATIN2"))
+dat_TL_ha <- read_csv("spam2010V2r0_global_H_TL.csv",locale = locale(encoding = "LATIN2"))
+dat_TS_ha <- read_csv("spam2010V2r0_global_H_TS.csv",locale = locale(encoding = "LATIN2"))
+dat_TR_ha <- read_csv("spam2010V2r0_global_H_TR.csv",locale = locale(encoding = "LATIN2"))
+
+# Create a spatial SPAM file 
+geo_spam=dplyr::select(dat_TA_pr, x,y, cell5m, name_adm1, iso3) 
+# %>% 
+  # distinct_at(vars(name_adm1))
+
+# Make SPAM spatial 
+# projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
+# proj_mercator = "+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs"
+proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below 
+proj_meter = "+datum=WGS84 +units=m +no_defs"
+geo_spam1 <- st_as_sf(x = geo_spam,                         
+                     coords = c("x", "y"),
+                     crs = proj)
+
+# Load administrative unit shapefile which matches the SPAM adm1 zones
+Admin0 = st_read(here::here("Input_data","GAUL_GADM_SPAM_ULRIKE", "g2008_0_now","g2008_0.shp"))
+Admin1 = st_read(here::here("Input_data","GAUL_GADM_SPAM_ULRIKE", "g2008_1_now","g2008_1.shp")) #this is what we need
+Admin2 = st_read(here::here("Input_data","GAUL_GADM_SPAM_ULRIKE", "g2008_2_2020_11_02","g2008_2.shp")) #this is what we need
+
+# Here I manually adjusted the NAME1 values to the once used in SPAM2010. see Documentation for more. 
+Admin1Adj = st_read(here::here("Input_data","g2008_1_now", "g2008_1.shp"), options = "ENCODING=LATIN2")#WINDOWS-1252
+
+# Now the values from GGS that are still not matching the SPAM2010 adm1 values are recoded
+Admin1Ad2 = Admin1Adj %>% as_tibble() %>% 
+  mutate(NAME1 = case_when(ADM0_CODE == 196 & #Philipines
+                            NAME1 =="Autonomous region in Muslim Mindanao (ARMM)"~ "Autonomous region in Muslim Mi",
+                         TRUE ~ NAME1)) %>%
+  mutate(NAME1 = case_when(ADM0_CODE == 196 & #Philipines
+                           NAME1 =="Region IX (Zamboanga Peninsula)" ~ "Region IX (Zamboanga Peninsula",
+                         TRUE ~ NAME1)) %>% 
+  mutate(NAME1 = case_when(ADM0_CODE == 19 & #Azerbaijan
+                             NAME1 =="Administrative unit not available" ~ "Administrative unit not availa",
+                           TRUE ~ NAME1)) %>% 
+  mutate(NAME1 = case_when(ADM0_CODE == 152 & #Malawi
+                             NAME1 =="Area under National Administration" ~ "Area under National Administra",
+                           TRUE ~ NAME1)) %>% 
+  mutate(NAME1 = case_when(ADM0_CODE == 196 & #Philipines
+                             NAME1 =="Cordillera Administrative region (CAR)" ~ "Cordillera Administrative regi",
+                           TRUE ~ NAME1)) %>% 
+  mutate(NAME1 = case_when(ADM0_CODE == 196 & #Philipines
+                             NAME1 =="Autonomous region in Muslim Mindanao (ARMM)"~ "Autonomous region in Muslim Mi",
+                           TRUE ~ NAME1)) %>%
+  mutate(NAME1 = case_when(ADM0_CODE == 108 & #Haiti
+                             NAME1 =="NOROESTE" ~ "Nord-Est",
+                           TRUE ~ NAME1)) %>% 
+  mutate(NAME1 = case_when(ADM0_CODE == 111 & # Honduras
+                             NAME1 =="Nor Oriental" ~ "Gracias A Dios",
+                           TRUE ~ NAME1))
+
+# issue1: Some country like cote d ivoire are still wrongly written. Check out windows decoding. 
+# Issue2: Check out also where to combine the arable land. 
+
+# ENCODING=WINDOWS-1252
+# Define projection for GAULGADMSPAM
+proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below 
+proj_meter = "+datum=WGS84 +units=m +no_defs"
+Admin1 <- st_as_sf(x = Admin1Adj,                         
+                      # coords = c("x", "y"),
+                      crs = proj)
+
+# Joining SPAM df and GAUL-GADM-SPAM --------------------------------------
+
+# Ulrike's comment: Shape files for GAUL-GADM-SPAM
+# Please note that the columns fips0, fips1 and fips2 are the ones which have the codes for admin level 0 (country), level 1 and level 2 which can be linked to SPAM results.
+# The matching names are in columns Adm0_name, name1 and name2.# 
+# Columns which have names O_fips1, O_fips2, O_name1 and O_name2 refer to an “old” classification system at level 1 and level 2. Its entries differ from the columns without “O_” in some EU countries and a few others where the statistic units did not match the admin units. Please note that the shape files themselves were not modified, only the entries in the dbf tables. 
+
+# adm1 unit match check
+spam_admin_link = 
+  dat_TA_pr%>% dplyr::select(iso3, cell5m, name_cntr, name_adm1) %>% 
+  distinct_at(vars(name_adm1), .keep_all = T) #%>% 
+  full_join(., Admin1Adj, by=c("name_adm1"="NAME1"))
+
+adm1_distinct=Admin1Adj %>% as_tibble() %>% 
+  distinct_at(vars(NAME1), .keep_all = T)
+str(Admin1)
+
+adm1_distinct=as_tibble(Admin1) %>% 
+  distinct_at(vars(FIPS1))#, .keep_all = T)
+
+# adm2 units 
+adm2o_distinct=as_tibble(Admin1) %>% 
+  distinct_at(vars(O_NAME))#, .keep_all = T)
+
+adm2_distinct=as_tibble(Admin1) %>% 
+  distinct_at(vars(FIPS1))#, .keep_all = T)
+
+# Validation - Making sure SPAM2010 matches with admin Shapefiles 
+
+# Adm0 - unique countries 
+# SPAM2010
+# 201 (name_cntr)
+# 201 (iso3)
+
+# ADM1 GAUL_SPAM_GADM
+# > 275 (ADM0_NAME)
+# > 250 (FIPS0)
+# 
+
+# ADM1
+# 1. Same amount of rows when distinct adm1 names? 
+
+# SPAM2010 Admin1 units
+# 2493 (name_adm1)
+
+# ADM1 GAUL_SPAM_GADM
+# 3230 (NAME1)
+# 3079 (O_NAME1
+# 2898 (FIPS1)
+
+# ADM2 GAUL_SPAM_GADM
+#  3080 (O_NAME1)
+#  2758 (NAME1)
+#  3222 (O_FIPS1)
+#  2898 (FIPS1)
+
+
+# How to deal with inconsistency? -----------------------------------------
+# 1. Spatial join 
+# 2. Fulljoin > detect NA's then check what is different
+
+# Spatial join 
+join_SPAM_adm1 = st_join(Admin1, geo_spam1, join = st_intersects, largest = T)
+
+# the final adm1 units are calculated by first full join and then by removing the NAs (=all variables that are not
+# in the SPAM2010 and some that are not represented in the GGS (no shapes available for England subunits for example)).
+# Finally, we have now 2468 adm1 units globally (of initially  2493 adm1 units; Diff: 25 adm1 units). The area of these
+# 25 adm1 units needs to be added to the representative units. Otherwise, the total arable land will be off. TODO! 
+
+# Disinct GGS adjusted
+adm1_distinctAdj=Admin1Adj2 %>% as_tibble() %>% 
+  distinct_at(vars(NAME1))#, .keep_all = T)
+#2,741 rows
+
+# Distinct SPAM2010
+spam_admin_link = dat_TA_pr %>% dplyr::select(iso3, cell5m, name_cntr, name_adm1) %>% 
+  distinct_at(vars(name_adm1))#, .keep_all = T) %>% 
+#2,493 (diff=248)
+
+
+# Left joining the SPAM2010 and GGS adjusted. THe result is not yet matching in terms of rows 
+FINAL_adm1_spam2010_GGS =  dat_TA_pr %>% dplyr::select(iso3, cell5m, name_cntr, name_adm1) %>% 
+  distinct_at(vars(name_adm1), .keep_all = T) #%>% 
+  left_join(., adm1_distinctAdj, by=c("name_adm1" = "NAME1")) %>% 
+  dplyr::select(iso3, cell5m, name_adm1, ADM1_CODE, FIPS1, FIPS0) #%>% 
+   # drop_na()
+
+write_csv(FINAL_adm1_spam2010_GGS, here::here("Input_data", "test67.csv"))
+
+
+# Difference between "FINAL_adm1_spam2010_GGS"  (rows 2493 ) AND  "spam_admin_link" (rows 2468).
+# The difference of 25 rows is due to 8 adm1 district available in the SPAM2010 but not available in the GGS dataset.
+# Further, spelling errors could be a reason as well. 
+
+
+
+# With aggregation long -----------------------------------------------------
+ChangeNames <- function(x) { ## Renaming function
+  crp <- c('whea', 'rice', 'maiz', 'barl', 'pmil', 'smil', 'sorg', 'ocer', 'pota', 'swpo', 'yams', 'cass', 'orts', 'bean', 'chic', 'cowp', 'pige', 'lent', 'opul', 'soyb',
+           'grou', 'cnut', 'oilp', 'sunf', 'rape', 'sesa', 'ooil', 'sugc', 'sugb', 'cott', 'ofib', 'acof', 'rcof', 'coco', 'teas', 'toba', 'bana', 'plnt', 'trof', 'temf', 'vege', 'rest')
+  names(x)[10:51] <- crp
+  return(x)
+} 
+
+GatherFun <- function(k){
+  k %>% gather('crop', 'valu', whea:rest)
+}
+
+# AggregateFun_mean <- function(z){ #for yield
+#   ddply(z, .(name_adm1, crop),
+#         function(z) mean(z$valu[z$valu!=0]))
+# }
+
+# AggregateFun_sum <- function(g){ #for area
+#   ddply(g, .(name_adm1, crop),
+#         function(g) sum(g$valu))
+# }
+
+AggregateFun_sum <- function(g){ #for area
+  g %>%  dplyr::select(iso3, name_adm1, cell5m, rec_type, tech_type, crop, unit, valu) %>%
+    group_by(iso3, name_adm1, crop) %>% 
+    mutate(valu = sum(valu)) %>% 
+    distinct_at(vars(name_adm1), .keep_all = T)
+}
+
+
+CollapseFun <- function(h){
+  h %>% dplyr::distinct_at(vars('name_adm1', 'crop'), .keep_all = TRUE)
+}
+
+CombineFun <- function(d,s){
+  full_join(d,s, by=c("name_adm1", 'crop'))
+}
+
+CleanFun <- function(l){
+l %>% 
+  dplyr::rename(value = V1) %>%dplyr::select(iso3, name_cntr, name_adm1, cell5m, 
+              rec_type, tech_type, crop, unit, valu)
+}
+
+# TransformFun_yld <- function(d){
+#   p <- ChangeNames(d)
+#   l <- GatherFun(p)
+#   r <- AggregateFun_mean(l)
+#   y <- CollapseFun(l)
+#   u <- CombineFun(r,y)
+#   j <- CleanFun(p)
+# }
+
+# TransformFun_area <- function(d){
+p <- ChangeNames(dat_TA_pa)
+l <- GatherFun(p)
+r <- AggregateFun_sum(l)
+y <- CollapseFun(l)
+u <- CombineFun(r,y)
+j <- CleanFun(u)
+# }
+
+
+# Applying transformation to production
+TA_prd <- TransformFun_area(dat_TA_pr)
+TH_prd <- TransformFun_area(dat_TH_pr)
+TI_prd <- TransformFun_area(dat_TI_pr)
+TL_prd <- TransformFun_area(dat_TL_pr)
+TR_prd <- TransformFun_area(dat_TR_pr)
+TS_prd <- TransformFun_area(dat_TS_pr)
+lst.prd <- list(TA_prd, TH_prd, TI_prd, TL_prd, TR_prd, TS_prd)
+dat_prd <- do.call("rbind", lst.prd) # merging all tech types
+write_csv(dat_prd, here::here("Input_data", "production_all_rec_tech_adm1.csv")) 
+
+#Applying transformation to physical_tech types
+TA_phy <- TransformFun_area(dat_TA_pa)
+TH_phy <- TransformFun_area(dat_TH_pa)
+TI_phy <- TransformFun_area(dat_TI_pa)
+TL_phy <- TransformFun_area(dat_TL_pa)
+TR_phy <- TransformFun_area(dat_TR_pa)
+TS_phy <- TransformFun_area(dat_TS_pa)
+lst.phy <- list(TA_phy, TH_phy, TI_phy, TL_phy, TR_phy, TS_phy)
+dat_phy <- do.call("rbind", lst.phy) # merging all tech types
+write_csv(dat_phy, here::here("Input_data", "physicalarea_all_rec_tech_adm1.csv"))
+
+#Applying transformation to physical_tech types
+TA_har <- TransformFun_area(dat_TA_ha)
+TH_har <- TransformFun_area(dat_TH_ha)
+TI_har <- TransformFun_area(dat_TI_ha)
+TL_har <- TransformFun_area(dat_TL_ha)
+TR_har <- TransformFun_area(dat_TR_ha)
+TS_har <- TransformFun_area(dat_TS_ha)
+lst.har <- list(TA_har, TH_har, TI_har, TL_har, TR_har, TS_har)
+dat_har <- do.call("rbind", lst.har) # merging all tech types
+write_csv(dat_har, here::here("Input_data", "harvestedarea_all_rec_tech_adm1.csv"))
+
+# Validating aggregation  -------------------------------------------------
+# 1. Sum up adm1 units and compare if you get same aggregation results  
+# 2. Do this for all rec and tech types 
+dat_har %>%  as_tibble()
+dat_phy %>%  as_tibble()
+dat_prd %>%  as_tibble()
+
+
+
+
+
+
+
+
+# Independent -- Merging treenuts with SPAM -------------------------------
+# LOAD data from previous step (due to memory overuse)
+# setwd(here('Input_data', "Rectypes_full_cropspread"))
+# yld_dat <- read_csv(here::here("Input_data", "harvestedarea_all_rec_tech_adm1.csv")
+prd_dat <- read_csv(here::here("Input_data", "production_all_rec_tech_adm1.csv")) 
+har_dat <- read_csv(here::here("Input_data", "harvestedarea_all_rec_tech_adm1.csv"))
+phy_dat <- read_csv(here::here("Input_data", "physicalarea_all_rec_tech_adm1.csv"))
+
+
+# TREENUTS WITH GADM ------------------------------------------------------
+# # Load and check shapefile 
+# wd=setwd(here("Input_data"))
+# sph.prod = read_sf(dsn = wd, layer = "GADM_treenut_rest_pd")
+# sph.harv = read_sf(dsn = wd, layer = "GADM_treenut_rest_ha")
+# sph.all = st_join(sph.prod, sph.harv, join = st_within)
+# # write_sf(sph.all, dsn = here("Input_data", driver = "ESRI shapefile"))
+# sph.prod.tib = as_tibble(sph.prod)
+# sph.prod.tib = dplyr::select(sph.prod.tib, UID, GID_0, GID_1, trnt_r_)
+# 
+# sph.harv.tib = as_tibble(sph.harv)
+# sph.harv.tib = dplyr::select(sph.harv.tib, UID, GID_0, GID_1, trnt_r_)
+# 
+# # loading SPAM-GADM join
+# spam.gadm.link = read_sf(dsn = here("Input_data", "GADMtoSPAM", "GADM_to_SPAM_within"), 
+#                          layer = "GADM_to_SPAM_within")
+# # 
+# # Treenuts GADM pixel  ---------------------------------------------------------------
+# # CAUTION: THESE FILES ARE WRONG! THE RATIO IS WRONGLY CALULATED (not grouped by adm1 and adm0 level)
+# # Treenuts are loaded from the treenuts script
+# # Production
+# # rbenchmark::benchmark(
+# spam.gadm.link %>% 
+#   as_tibble() %>% 
+#   left_join(.,sph.prod.tib, by="UID") %>% 
+#   left_join(.,prd_dat, by="alloc_key") %>% 
+#   mutate(tnuts = trnt_r_*rest) %>% 
+#   mutate(rest.1 = (1-trnt_r_)*rest) %>% 
+#   dplyr::select(alloc_key, UID, GID_0.x, GID_1.x, rec_type, tech_type, unit,
+#          whea:rest, tnuts, rest.1) %>% 
+#   write.csv(here("Input_data", "prod_tnuts_spam_gadm.csv"))
+# 
+# # 
+# # c=read_csv(here("Input_data", "prod_tnuts_spam_gadm.csv"))
+# # 
+# # Approach validation > do the numbers make sense?
+# # xy=dat_join_prod %>% 
+# #  group_by(tnuts) %>%
+# #   top_n(n=10) %>% 
+# #   dplyr::select(rest, tnuts, rest.1)
+# 
+# # write_csv(dat_join_prod, here("Input_data", "dat_treenut_prod.csv"))
+#   # st_as_sf()
+# 
+# # Harvested area 
+# # rbenchmark::benchmark(
+# spam.gadm.link %>% 
+#     as_tibble() %>% 
+#     left_join(., har_dat, by="alloc_key") %>%
+#     left_join(.,sph.harv.tib, by="UID") %>% 
+#   mutate(tnuts = trnt_r_*rest) %>% 
+#   mutate(rest.1 = (1-trnt_r_)*rest) %>% 
+#   dplyr::select(alloc_key, UID, GID_0.x, GID_1.x, rec_type, tech_type, unit,
+#                 whea:rest, tnuts, rest.1) %>% 
+#   write.csv(here("Input_data", "harv_tnuts_spam_gadm"))
+# # )
+# 
+# # Physical land
+# # rbenchmark::benchmark(
+# spam.gadm.link %>% 
+#   as_tibble() %>% 
+#   left_join(., phy_dat, by="alloc_key") %>%
+#   left_join(.,sph.harv.tib, by="UID") %>% 
+#   mutate(tnuts = trnt_r_*rest) %>% 
+#   mutate(rest.1 = (1-trnt_r_)*rest) %>% 
+#   dplyr::select(alloc_key, UID, GID_0.x, GID_1.x, rec_type, tech_type, unit,
+#                 whea:rest, tnuts, rest.1) %>% 
+#   write.csv(here("Input_data", "phys_tnuts_spam_gadm"))
+# 
+# # Approach: Aim is to get dinal datasets on pixel level > Adm1 level
+# 1_Cbind the three datasets 
+# 2_Gather crops 
+# 3_Spread levels rec type
+# 4_Calculate yields after unit transformation  
+
+# # 1. Load data
+# # Global 
+# dat.phys = read_csv(here("Input_data", "phys_tnuts_spam_gadm.csv"))
+# dat.prod = read_csv(here("Input_data", "prod_tnuts_spam_gadm,csv"))
+# dat.harv = read_csv(here("Input_data", "harv_tnuts_spam_gadm.csv"))
+# 
+# 
+# # lst.tnuts = list(dat.harv, dat.phys, dat.prod)
+# dat_nutsall <- do.call("rbind", list(dat.harv, dat.phys, dat.prod))
+# 
+# # GLobal - Issue: Do this with the new laptop! 
+# # Principle of this function is to not generate any variable as this can lead to overuse of RAM.
+# do.call("rbind", list(read_csv(here("Input_data", "phys_tnuts_spam_gadm")), 
+#                       read_csv(here("Input_data", "prod_tnuts_spam_gadm")), 
+#                       read_csv(here("Input_data", "harv_tnuts_spam_gadm")))) %>%
+#   dplyr::select(-c(rest, X1)) %>%
+#   pivot_longer(names_to = "crops", values_to = "value", cols = whea:rest.1) %>% 
+#   pivot_wider(-unit, names_from = "rec_type", 
+#               values_from = "value" ) %>% 
+#   mutate(Y = (P*1000/H))%>% #checked for result. We are transforming mt to tons when multiplying Producction by 1000 
+#   mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% 
+#   rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% 
+#   write.csv(here("Input_data", "pxl_rec_tec_gadm_spam_tnuts"))
+
+# # SUBSETTING FOR EU-27 GADM ----------------------------------------------------------------------
+# EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", 
+#             "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR")
+# write.csv(EU_iso3, here("Input_data", "EU27_iso3_lst.csv"))
+# length(EU_iso3) #27 member states > the list comes from gams
+# 
+# # Check if the EU countries iso3 are all present in dat.phys etc.
+# EU_iso3 %in% dat.phys$GID_0.x #Validation 
+# dat.phys.EU = read_csv(here("Input_data", "phys_tnuts_spam_gadm"))
+#   filter(dat.phys, GID_0.x %in% EU_iso3)
+# 
+# dat.prod.EU = read_csv(here("Input_data", "prod_tnuts_spam_gadm")) %>%
+#   filter(dat.prod, GID_0.x %in% EU_iso3)
+# 
+# dat.harv.EU = read_csv(here("Input_data", "harv_tnuts_spam_gadm")) %>% 
+# dat.harv.EU = filter(dat.harv, GID_0.x %in% EU_iso3)
+# 
+# 
+# do.call("rbind", list(read_csv(here("Input_data", "phys_tnuts_spam_gadm")),
+#                       read_csv(here("Input_data", "prod_tnuts_spam_gadm")), 
+#                       read_csv(here("Input_data", "harv_tnuts_spam_gadm")))) %>% 
+#   filter(GID_0.x %in% EU_iso3) %>% 
+#   dplyr::select(-c(rest, X1)) %>%
+#   pivot_longer(names_to = "crops", values_to = "value", cols = whea:rest.1) %>% 
+#   pivot_wider(-unit, names_from = "rec_type", 
+#               values_from = "value" ) %>% 
+#   mutate(Y = (P*1000/H))%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. 
+#   mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% 
+#   rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% 
+#   write.csv(here("Input_data", "pxl_rec_tec_gadm_spam_tnuts_EU27"))
+# 
+# # Testing the approach
+# {
+# # cbind dfs - testing subset
+# fil.prod = dat.prod %>% top_n(n=20)
+# fil.harv = dat.harv %>% top_n(n=20)
+# fil.phys  = dat.phys %>% top_n(n=20)
+# 
+# lst.tnuts = list(fil.harv, fil.phys, fil.prod)
+# do.call("rbind", list(fil.harv, fil.phys, fil.prod))
+# 
+# long.dat.nuts <- do.call("rbind", list(fil.harv, fil.phys, fil.prod)) %>%
+#   dplyr::select(-c(X1)) %>%
+#   pivot_longer(names_to = "crops", values_to = "value", cols = whea:tnuts) %>% 
+#   pivot_wider(-unit, names_from = "rec_type", 
+#               values_from = "value" ) %>% 
+#   mutate(Y = (P*1000/H))%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. 
+#   mutate_at(vars(Y), ~replace(., is.nan(.), 0)) #%>% 
+#   # rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% 
+#   write.csv(here("Input_data", "test_rbind_fil"))
+# } 
+
+
+
+# SUBSETTING FOR EU-27 GAUL  ----------------------------------------------------------------------
+EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", 
+            "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR")
+# write.csv(EU_iso3, here("Input_data", "EU27_iso3_lst.csv"))
+length(EU_iso3) #27 member states > the list comes from gams
+
+# Check if the EU countries iso3 are all present in dat.phys etc.
+# EU_iso3 %in% dat.phys$iso3 #Validation if all EU27 countries are in the dfs below
+dat.phys.EU = read_csv(here("Input_data", "phys_tnuts_rest_spam_gaul_FINAL.csv")) %>% #Pixel level data
+  filter(iso3 %in% EU_iso3) %>% 
+  mutate(alloc_key = str_sub(alloc_key, start=2)) %>% 
+  dplyr::select(-c(X1_1, X1))
+
+
+dat.prod.EU = read_csv(here("Input_data", "prod_tnuts_rest_spam_gaul_FINAL1.csv")) %>% 
+  filter(iso3 %in% EU_iso3)%>% 
+  mutate(alloc_key = str_sub(alloc_key, start=2)) %>% 
+  dplyr::select(-c(X1))
+
+
+dat.harv.EU = read_csv(here("Input_data", "harv_tnuts_rest_spam_gaul_FINAL.csv")) %>% 
+  filter(iso3 %in% EU_iso3) %>% 
+  mutate(alloc_key = str_sub(alloc_key, start=2)) %>% 
+  dplyr::select(-c(X1_1, X1))
+
+
+dat.all.EU=  do.call("rbind", list(dat.phys.EU, dat.prod.EU, dat.harv.EU)) %>% 
+  # filter(iso3 %in% EU_iso3) %>% 
+  # dplyr::select(-c(X1, X1_1)) %>%
+  pivot_longer(names_to = "crops", values_to = "value", cols = whea:tnuts) %>% 
+  pivot_wider(-unit, names_from = "rec_type", 
+              values_from = "value" ) %>% 
+  mutate(Y = (P*1000/H)) #%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. 
+mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% 
+  # rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% 
+  write_csv(here("Input_data", "pxl_rec_tec_gaul_spam_tnuts_EU_27_corr_prod.csv")) #IMPORTANT dataset. Pixel level. Tnuts.
+dat.all.EU=read_csv(here("Input_data","pxl_rec_tec_gaul_spam_tnuts_EU_27_corr_prod.csv"))
+
+# Testing the approach
+# {
+# # cbind dfs - testing subset
+# fil.prod = dat.prod %>% top_n(n=20)
+# fil.harv = dat.harv %>% top_n(n=20)
+# fil.phys  = dat.phys %>% top_n(n=20)
+# 
+# lst.tnuts = list(fil.harv, fil.phys, fil.prod)
+# x= do.call("rbind", list(fil.harv, fil.phys, fil.prod))
+# 
+# long.dat.nuts <- do.call("rbind", list(fil.harv, fil.phys, fil.prod)) %>%
+#   dplyr::select(-c(X1)) %>%
+#   pivot_longer(names_to = "crops", values_to = "value", cols = whea:rest) %>% 
+#   pivot_wider(-unit, names_from = "rec_type", 
+#               values_from = "value" ) #%>% 
+#   mutate(Y = (P*1000/H))%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. 
+#   mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% 
+#   rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% 
+#   write.csv(here("Input_data", "test_rbind_fil"))
+# } 
+
+# Crop name mapping -------------------------------------------------------
+# SPAM mapping 
+nam_cat <- read_csv(here("Input_data", 'crop_names.csv')) # Loading crop name sheets - mapping SPAM -- FAO (FROM SPAM)
+nam_cat <- nam_cat %>%  mutate(FAOCODE = as.double(FAOCODE)) %>% 
+  rename(No_spam = names(nam_cat)[1])
+
+GAEZ_SPAM_crp = read_csv(here("Input_data", "6-GAEZ-and-SPAM-crops-correspondence-2015-02-26.csv")) #Mapping GAEZ and SPAM (from SPAM)
+GAEZ_SPAM_crp = GAEZ_SPAM_crp %>% 
+  rename(No_spam = names(.)[4]) %>% 
+  dplyr::select('SPAM crop long name', 'GAEZ crop', No_spam) 
+
+# ORiginal Croplist Modeldat CIFOS
+FAO_CIFOS_crp = read_csv(here("Input_data", "FAO_modeldat_2020.csv"))
+
+# FAO spreaddsheets 
+FAO_stat_control = read_csv(here("Input_data", "FAOSTAT_data_12-26-2020-1.csv"))
+food_cat <- read_csv(here("Input_data",'foodnonfood.csv'))# laoding food usage sheet 
+food_cat = food_cat %>% 
+  rename(`SPAM short name`=Crop_short)
+FAOcntr <- read_csv(here("Input_data", 'FAO_countries.csv')) # loading FAO country codes
+FAOcntr <- dplyr::select(FAOcntr, 'ISO3', 'FAOSTAT_CODE', "GAUL_CODE", 'Continent', 'Subcontinent')
+
+
+# The incomplete mapping of SPAM, GAEZ, FAOstat, FAO_Cifos are made 
+# Not all the crops are matched perfectly. This is made by hand in excel (export-import a chunk below)
+model_dat = left_join(FAO_CIFOS_crp, FAO_stat_control, by="crop") %>%
+  left_join(nam_cat, ., by="FAOCODE") %>%
+  left_join(.,GAEZ_SPAM_crp, by= "No_spam") %>% 
+  write.csv(here("Input_data", "model_crops_incomplete.csv"))
+
+# This data frame is a complete mapping of GAEZ, SPAM and FAO (FAO original and CIFOS model dat FAO crop names)
+# The FAO crop names from the model data differ slightly between Model dat and the fAO crops that I exctracted from FAOSTAT. 
+# Crop others were adjusted according to SPAM indication and the GAEZ mapped crops (cereal others = oats, etc.)
+
+dat_crp_map <- read_csv("model_crops_mapping_GAEZ_SPAM_FAO_complete.csv")
+# dat_crp_map2 <- read_csv(here("Input_data", "model_crops_incomplete.csv")) 
+
+#Validation > Result = Complete match!
+# dat_crp_ma p2$ crop_cifos %in% FAO_CIFOS_crp$crop 
+
+dat_crp_map1 = dat_crp_map %>% 
+  rename(crop_cifos = crop) %>% 
+  left_join(.,food_cat) #adding food categories (food/non-food)
+
+dat_crp_map1$Usage[42]="food" #adding the food category to treenuts 
+write.csv(dat_crp_map1, here("Input_data", "crop_name_mapping_complete.csv")) # this file can be used for further mapping of the crops 
+dat_crp_map=read_csv(here("Input_data", "crop_name_mapping_complete.csv"))# Aggregating to ADM1 level EU -----------------------------------------------
+
+
+# Pxl EU27 data  ----------------------------------------------------------
+
+
+dat.pxl.EU = read_csv(here("Input_data", "pxl_rec_tec_gaul_spam_tnuts_EU_27_corr_prod.csv"))
+dat_TA_yld = read_csv(here("Input_data", "spam2010V2r0_global_Y_TA.csv")) %>% 
+  dplyr::select(alloc_key,name_cntr,name_adm1, iso3, x, y) %>% 
+  mutate(alloc_key = str_sub(alloc_key, 2))
+
+
+# Mapping countries
+FAOcntr <- read_csv(here("Input_data", 'FAO_countries.csv'))
+FAOcntr <- dplyr::select(FAOcntr, 'ISO3', 'FAOSTAT_CODE', "GAUL_CODE", 'Continent', 'Subcontinent') %>% 
+  rename(iso3=ISO3)
+dat_EU27 = read_csv(here("Input_data", "Country_name_mapping_incomplete.csv"))%>% 
+  left_join(.,FAOcntr, by="iso3")
+
+dat_crop_nam = read_csv(here("Input_data", "crop_name_mapping_complete.csv")) %>% 
+  dplyr::select(-c(X1,X1_1)) %>% 
+  rename(crops = 'SPAM short name')
+
+# Transform pixel data so that it has SPAM variables (adm1 level) 
+
+dat.pixel.EU = dat.pxl.EU %>%
+  mutate_at(vars(alloc_key), funs(as.character)) %>%  
+  left_join(.,dat_TA_yld, by="alloc_key") %>%
+  dplyr::select(alloc_key, iso3.x, name_cntr.x,name_adm1.x, iso3.x, G2008_1_ID, 
+                ADM0_CODE, ADM0_NAME, ADM1_CODE, ADM1_NAME, x, y, crops, tech_type) %>% 
+  rename(iso3=iso3.x,
+         name_cntr=name_cntr.x,
+         name_adm1=name_adm1.x) %>% 
+  left_join(.,dat_crop_nam, by="crops") %>% 
+  left_join(.,dat_EU27, by="iso3") %>% 
+  dplyr::select(G2008_1_ID, tech_type, CIFOS_cntr_nam, crop_cifos, crops) 
+
+# df with prod_level and Cifos country EU
+EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", 
+            "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR")
+
+dat_cntr_EU = dat.pxl.EU %>%
+  mutate_at(vars(alloc_key), funs(as.character)) %>%
+  left_join(.,dat_TA_yld, by="alloc_key") %>%
+  dplyr::select(iso3.x, name_adm1.x) %>% 
+  dplyr::distinct_at(vars('iso3.x', "name_adm1.x"), .keep_all = F) %>%
+  rename(iso3=iso3.x, 
+         name_adm1=name_adm1.x) %>% 
+  filter(iso3 %in% EU_iso3) %>% 
+  left_join(.,dat_EU27, by="iso3") %>% 
+  dplyr::select(iso3, name_adm1, CIFOS_cntr_nam, Continent, Subcontinent)# %>% 
+dat_cntr_EU %>%  write_csv(here("Input_data", "dat_cntr_EU.csv"))
+
+# rename(
+# iso3=iso3.x, 
+# name_adm1=name_adm1.x)
+
+# df with crops and cifos crops
+dat_crop_nam1 = read_csv(here("Input_data", "crop_name_mapping_complete.csv")) %>% 
+  dplyr::select('SPAM short name', FAONAMES, crop_cifos) %>% 
+  rename(crops = 'SPAM short name')
+
+# Aggregating H,A,P and calculating Yields per Adm1 level 
+dat.adm1.EU = dat.pxl.EU %>%
+  mutate_at(vars(alloc_key), funs(as.character)) %>%
+  #   left_join(., dat.pxl.EU, by="ADM1_NAME") %>% 
+  # mutate_at(vars(alloc_key), funs(as.character)) %>%
+  left_join(.,dat_TA_yld, by="alloc_key") %>%
+  dplyr::select(iso3.x, name_cntr.x,  name_adm1.x, ADM0_NAME, ADM1_NAME, tech_type, crops, A, H, P) %>%
+  group_by(iso3.x, name_adm1.x, tech_type, crops) %>% 
+  summarize(across(.cols = A:P, .fns = sum))%>% #all three rec types are summed per adm1(prod_level) and crop
+  mutate(Y = (P*1000/H))%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. 
+  mutate_at(vars(Y), ~replace(., is.nan(.), 0)) #%>% 
+
+# This is the sheet that contains the parameters that are important for the GAMS input file
+# It contains yield, harvested and physical area per country, adm1 unit and crop.
+EU27_crp_adm1 = dat.adm1.EU %>% 
+  left_join(.,dat_cntr_EU, by=c("name_adm1.x"="name_adm1")) %>% 
+  filter(iso3 %in% EU_iso3) %>%
+  left_join(.,dat_crop_nam1, by="crops") %>% 
+  dplyr::select(iso3, FAONAMES, Subcontinent, Continent, CIFOS_cntr_nam, name_adm1, crop_cifos, A, H, P, Y) %>% 
+  write.csv(here("Input_data", "EU27_crp_adm1.csv")) # Final crop yield,area,etc. file. 
+
+# Validate
+EU27_crp_adm1 <- read_csv("C:/Users/simon083/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Modified_scripts/R/CifoS_Crop/Input_data/EU27_crp_adm1.csv")
+View(EU27_crp_adm1)
+
+length(unique(EU27_crp_adm1$iso3))
+is.na(EU27_crp_adm1)#Correct! 27 countries present. 
+# Continue: Why is ALB in the final dataset. Check where this went wrong 
+# Validate Yields. Sum the ADM1 by hand. Check if matches 
+# Berechne ob das sinn macht > 319652 rows ?!
+
+
+# Then add again the GADM and get values for the SOC% >> ADd the peat non peat to it. 
+# ADd usage to the sheet (or single sheet)
+#
+
+# Bringing back some SPAM geo-variables to compare them with GADM 
+dat.agg.EU = dat_TA_yld %>% 
+  dplyr::select(iso3, alloc_key, x, y,name_cntr, name_adm1) %>% 
+  right_join(.,dat.pxl.EU, by="alloc_key")
+
+
+
+
+dat.agg.EU %>% dplyr::select(iso3) dat.pxl.EU %>% 
+  # dplyr::select(-Y) #%>% 
+  group_by(GID_1) %>% 
+  summarize
+
+max(dat.pxl.EU$Y)
+summarise()
+
+v <- validator()
+summary(v)
+cf <- confront(dat.pxl.EU, v)
+summary(cf)
+?confront
+
+
+
+# creating a tidy table with all tech variables in one df
+list.comb <- list(yld_dat, prod_dat, har_dat, phy_dat)
+list.comb2=list(dat_har, dat_phy, dat_yld, da)
+dat.comb <- do.call("rbind", list.comb)
+# write.csv(dat.comb, 'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/dat_comb_tidy.csv', row.names=T)
+
+
+
+
+
+# For ArcGIS - area physical per pixel  --------------------------
+
+setwd(here("Input_data"))
+# Physical area--------------------------------
+dat_TA_pa <- read_csv("spam2010V2r0_global_A_TA.csv")
+dat_TA_pa %>% dplyr::select(alloc_key, x, y) %>%write_csv(here("Input_data", "coordinates_SPAM.csv"))
+
+
+Phys_area_full_cropspread <- read_csv("Rectypes_full_cropspread/Phys_area_full_cropspread.csv")
+x=Phys_area_full_cropspread %>% 
+  dplyr::rowwise() %>% 
+  dplyr::mutate(phys_area_pixl = sum(dplyr::c_across(whea:rest), na.rm = T))
+
+x2=x %>% 
+  dplyr::select(alloc_key, x, y, rec_type, tech_type, phys_area_pixl) %>% 
+  group_by(alloc_key) %>% 
+  mutate(sum_phys_area_pixel = sum(phys_area_pixl))
+
+unique_coord = x2 %>% 
+  distinct_at(vars(alloc_key, sum_phys_area_pixel)) %>% 
+  left_join(., Phys_area_full_cropspread, by="alloc_key") %>% 
+  dplyr::select(alloc_key, x, y, sum_phys_area_pixel) %>% 
+  distinct_at(vars(alloc_key, x, y, sum_phys_area_pixel))
+
+write.csv(unique_coord, here("Input_data", "physical_area_per_pixel2.csv"))
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# loading crop usage sheet --------------------------------------------------------------
+setwd(here("Input_dat"))
+food_cat <- read_csv('foodnonfood.csv')
+# Loading crop name sheets
+nam_cat <- read_csv('crop_names.csv')
+# loading FAO country codes
+FAOcntr <- read_csv(here("Input_data", 'FAO_countries.csv'))
+FAOcntr <- dplyr::select(FAOcntr, 'ISO3', 'FAOSTAT_CODE', "GAUL_CODE", 'Continent', 'Subcontinent')
+
+# joining all together
+names(dat.comb)[8] <- "Crop_short"
+names(nam_cat)[2] <- "Crop_short"
+names(dat.comb)[2] <- "ISO3"
+
+join.use <- left_join(dat.comb, food_cat, by="Crop_short")
+join_FAOcntr <- left_join(join.use, FAOcntr, by="ISO3")
+SPAM_dat_tidy <- left_join(join_FAOcntr, nam_cat, by='Crop_short')
+
+
+
+
+
+AggregateFun_mean <- function(z){ #for yield
+  ddply(z, .(name_adm1, crop),
+        function(z) mean(z$valu[z$valu!=0]))
+}
+
+AggregateFun_sum <- function(g){ #for area
+  ddply(g, .(name_adm1, crop),
+        function(g) sum(g$valu))
+}
+
+CollapseFun <- function(h){
+  h %>% dplyr::distinct_at(vars('name_adm1', 'crop'), .keep_all = TRUE)
+}
+
+CombineFun <- function(d,s){
+  full_join(d,s, by=c("name_adm1", 'crop'))
+}
+
+
+
+# rename and reoder dataset columns
+Clean2Fun <- function(x){
+  i <- x %>% dplyr::rename(SPAMcrop_short=Crop_short, 
+                           SPAMcrop_long=`SPAM long name`,
+                           use=Usage) %>%
+    dplyr::select(-c(`No. crt.`, ID))%>%
+    arrange(name_adm1, SPAMcrop_short, rec_type, tech_type)
+  c <- i[c('ISO3', 'name_cntr', 'name_adm1', 'alloc_key', 'FAOSTAT_CODE', 'GAUL_CODE',   'SPAMcrop_short', 'Continent', 'Subcontinent', 'SPAMcrop_long', 'FAONAMES', 'FAOCODE', 'GROUP', 'use', 'rec_type', 'tech_type', 'unit', 'valu')]
+}
+SPAM_dat_tidy <-  Clean2Fun(SPAM_dat_tidy)
+
+# Save file - Option long dataset
+write.csv(SPAM_dat_tidy, here("Input_dat", "Yld_tidy_all.csv"), row.names=T)
+
+# Option2 - Harvested and Physical area horizontal
+SprdFun <- function(x){
+  t <- x %>% dplyr::select(-c(unit))  %>%
+    spread(rec_type, valu)%>% 
+    dplyr::rename(yield_kgha=Y, 
+                  phys_area_ha=A,
+                  harv_area_ha=H)
+  c <- t[c('Continent', 'Subcontinent', 'ISO3', 'name_cntr', 'name_adm1', 'alloc_key', 'FAOSTAT_CODE', 'GAUL_CODE', 'SPAMcrop_short', 'SPAMcrop_long', 'FAONAMES', 'FAOCODE', 'GROUP', 'use', 'tech_type', 'phys_area_ha', 'harv_area_ha', 'yield_kgha')]
+} 
+
+sprd_dat <- SprdFun(SPAM_dat_tidy)
+write.csv(sprd_dat_fixd, here("Input_dat","SPAMspread_all_fixd.csv"), row.names=T)
+
+
diff --git a/SPAM_Data_Transformation/SPAM_no_agg_spatial-L0157316.R b/SPAM_Data_Transformation/SPAM_no_agg_spatial-L0157316.R
new file mode 100644
index 0000000000000000000000000000000000000000..286a1fd1974b2d827ab02b5ff19c0efbe2b4455c
--- /dev/null
+++ b/SPAM_Data_Transformation/SPAM_no_agg_spatial-L0157316.R
@@ -0,0 +1,751 @@
+easypackages::packages(c("tidyverse", 'sf', 'raster','sp','dplyr','spatialEco',
+                         'exactextractr','tibble', 'here', "readr",
+                         "readxl", "foreign", "data.table", "rbenchmark", "benchmarkme",
+                         "validate"))
+setwd(here("Input_data"))
+# here::here()
+##To know the current storage capacity
+memory.limit()
+
+## To increase the storage capacity
+# memory.limit(size=20000)
+gc()
+# Load data ----------------------------------------------------------------
+# Yields ------------------------------------------------------------------
+dat_TA_yld <- read_csv("spam2010V2r0_global_Y_TA.csv")# Yields - all technologies together, ie complete crop - Data downloaded on 1. of September 2020
+dat_TI_yld <- read_csv("spam2010V2r0_global_Y_TI.csv")# Yields - Iirrigated portion of crop
+dat_TH_yld <- read_csv("spam2010V2r0_global_Y_TH.csv")# Yields - rainfed high inputs portion of crop
+dat_TL_yld <- read_csv("spam2010V2r0_global_Y_TL.csv")# Yields - rainfed low inputs portion of crop
+dat_TS_yld <- read_csv("spam2010V2r0_global_Y_TS.csv")# Yields - rainfed subsistence portion of crop
+dat_TR_yld <- read_csv("spam2010V2r0_global_Y_TR.csv")# Yields - rainfed portion of crop (= TA - TI, or TH + TL + TS)
+
+
+# Production
+dat_TA_pr <- read_csv("spam2010V2r0_global_P_TA.csv")
+dat_TI_pr <- read_csv("spam2010V2r0_global_P_TI.csv")
+dat_TH_pr <- read_csv("spam2010V2r0_global_P_TH.csv")
+dat_TL_pr <- read_csv("spam2010V2r0_global_P_TL.csv")
+dat_TS_pr <- read_csv("spam2010V2r0_global_P_TS.csv")
+dat_TR_pr <- read_csv("spam2010V2r0_global_P_TR.csv")
+
+# Physical area--------------------------------
+dat_TA_pa <- read_csv("spam2010V2r0_global_A_TA.csv")
+dat_TI_pa <- read_csv("spam2010V2r0_global_A_TI.csv")
+dat_TH_pa <- read_csv("spam2010V2r0_global_A_TH.csv")
+dat_TL_pa <- read_csv("spam2010V2r0_global_A_TL.csv")
+dat_TS_pa <- read_csv("spam2010V2r0_global_A_TS.csv")
+dat_TR_pa <- read_csv("spam2010V2r0_global_A_TR.csv")
+
+# Harvesting area----------------------------------
+dat_TA_ha <- read_csv("spam2010V2r0_global_H_TA.csv")
+dat_TI_ha <- read_csv("spam2010V2r0_global_H_TI.csv")
+dat_TH_ha <- read_csv("spam2010V2r0_global_H_TH.csv")
+dat_TL_ha <- read_csv("spam2010V2r0_global_H_TL.csv")
+dat_TS_ha <- read_csv("spam2010V2r0_global_H_TS.csv")
+dat_TR_ha <- read_csv("spam2010V2r0_global_H_TR.csv")
+
+# Functions - Yld ---------------------------------------------------------------
+ChangeNames <- function(x) { ## Renaming function
+  crp <- c('whea', 'rice', 'maiz', 'barl', 'pmil', 'smil', 'sorg', 'ocer', 'pota', 'swpo', 'yams', 'cass', 'orts', 'bean', 'chic', 'cowp', 'pige', 'lent', 'opul', 'soyb',
+                  'grou', 'cnut', 'oilp', 'sunf', 'rape', 'sesa', 'ooil', 'sugc', 'sugb', 'cott', 'ofib', 'acof', 'rcof', 'coco', 'teas', 'toba', 'bana', 'plnt', 'trof', 'temf', 'vege', 'rest')
+  names(x)[10:51] <- crp
+  return(x)
+} 
+GatherFun <- function(k){
+  k %>% gather('crop', 'valu', whea:rest)
+}
+# -------------------------------------------
+# If no aggregartion
+AggregateFun_mean <- function(z){ #for yield
+ ddply(z, .(name_adm1, crop),
+        function(z) mean(z$valu[z$valu!=0]))
+}
+
+AggregateFun_sum <- function(g){ #for area
+  ddply(g, .(name_adm1, crop),
+        function(g) sum(g$valu))
+}
+
+CollapseFun <- function(h){
+  h %>% dplyr::distinct_at(vars('name_adm1', 'crop'), .keep_all = TRUE)
+}
+
+CombineFun <- function(d,s){
+  full_join(d,s, by=c("name_adm1", 'crop'))
+}
+# -------------------------
+CleanFun <- function(l){
+  l %>% 
+    # dplyr::rename(
+    #   # value = V1,
+    #   cntr = iso3) %>%
+        dplyr::select(iso3, name_cntr, name_adm1, name_adm2, alloc_key, x, y, rec_type, tech_type, unit, whea:rest)
+      }
+
+TransformFun_yld <- function(d){
+  p <- ChangeNames(d)
+  # l <- GatherFun(p)
+  # r <- AggregateFun_mean(l)
+  # y <- CollapseFun(l)
+  # u <- CombineFun(r,y)
+  j <- CleanFun(p)
+}
+
+TransformFun_area <- function(d){
+  p <- ChangeNames(d)
+  # l <- GatherFun(p)
+  # r <- AggregateFun_sum(l)
+  # y <- CollapseFun(l)
+  # u <- CombineFun(r,y)
+  j <- CleanFun(p)
+}
+
+
+# With aggregation long -----------------------------------------------------
+ChangeNames <- function(x) { ## Renaming function
+  crp <- c('whea', 'rice', 'maiz', 'barl', 'pmil', 'smil', 'sorg', 'ocer', 'pota', 'swpo', 'yams', 'cass', 'orts', 'bean', 'chic', 'cowp', 'pige', 'lent', 'opul', 'soyb',
+           'grou', 'cnut', 'oilp', 'sunf', 'rape', 'sesa', 'ooil', 'sugc', 'sugb', 'cott', 'ofib', 'acof', 'rcof', 'coco', 'teas', 'toba', 'bana', 'plnt', 'trof', 'temf', 'vege', 'rest')
+  names(x)[10:51] <- crp
+  return(x)
+} 
+GatherFun <- function(k){
+  k %>% gather('crop', 'valu', whea:rest)
+}
+
+AggregateFun_mean <- function(z){ #for yield
+  ddply(z, .(name_adm1, crop),
+        function(z) mean(z$valu[z$valu!=0]))
+}
+
+AggregateFun_sum <- function(g){ #for area
+  ddply(g, .(name_adm1, crop),
+        function(g) sum(g$valu))
+}
+
+CollapseFun <- function(h){
+  h %>% dplyr::distinct_at(vars('name_adm1', 'crop'), .keep_all = TRUE)
+}
+
+CombineFun <- function(d,s){
+  full_join(d,s, by=c("name_adm1", 'crop'))
+}
+
+# CleanFun <- function(l){
+  u %>% 
+    dplyr::rename(value = V1) #%>%
+    dplyr::select(iso3, name_cntr, name_adm1, name_adm2, 
+                  alloc_key, x, y, rec_type, tech_type, 
+                  unit, whea:rest)
+}
+
+TransformFun_yld <- function(d){
+  p <- ChangeNames(d)
+  l <- GatherFun(p)
+  r <- AggregateFun_mean(l)
+  y <- CollapseFun(l)
+  u <- CombineFun(r,y)
+  j <- CleanFun(p)
+}
+
+# TransformFun_area <- function(d){
+  p <- ChangeNames(dat_TA_pd)
+  l <- GatherFun(p)
+  r <- AggregateFun_sum(p)
+  y <- CollapseFun(l)
+  u <- CombineFun(r,y)
+  j <- CleanFun(p)
+}
+
+
+# With aggregation l spread crops  ----------------------------------------
+
+
+
+ChangeNames <- function(x) { ## Renaming function
+  crp <- c('whea', 'rice', 'maiz', 'barl', 'pmil', 'smil', 'sorg', 'ocer', 'pota', 'swpo', 'yams', 'cass', 'orts', 'bean', 'chic', 'cowp', 'pige', 'lent', 'opul', 'soyb',
+           'grou', 'cnut', 'oilp', 'sunf', 'rape', 'sesa', 'ooil', 'sugc', 'sugb', 'cott', 'ofib', 'acof', 'rcof', 'coco', 'teas', 'toba', 'bana', 'plnt', 'trof', 'temf', 'vege', 'rest')
+  names(x)[10:51] <- crp
+  return(x)
+} 
+
+GatherFun <- function(k){
+  k %>% gather('crop', 'valu', whea:rest)
+}
+
+# AggregateFun_mean <- function(z){ #for yield
+#   ddply(z, .(name_adm1, crop),
+#         function(z) mean(z$valu[z$valu!=0]))
+# }
+
+
+# }
+
+AggregateFun_sum <- function(g){ #for area
+  ddply(g, .(name_adm1, crop),
+        function(g) sum(g$valu))
+}
+
+CollapseFun <- function(h){
+  h %>% dplyr::distinct_at(vars('name_adm1', 'crop'), .keep_all = TRUE)
+}
+
+CombineFun <- function(d,s){
+  full_join(d,s, by=c("name_adm1", 'crop'))
+}
+
+CleanFun <- function(l){
+l %>% 
+  # dplyr::rename(value = V1) #%>%
+dplyr::select(iso3, name_adm1,
+              alloc_key, x, y, rec_type, tech_type, 
+              unit, whea:rest)
+}
+
+# TransformFun_yld <- function(d){
+  p <- ChangeNames(d)
+  # l <- GatherFun(p)
+  # r <- AggregateFun_mean(l)
+  # y <- CollapseFun(l)
+  # u <- CombineFun(r,y)
+  j <- CleanFun(u)
+# }
+
+TransformFun_area <- function(d){
+p <- ChangeNames(d)
+# l <- GatherFun(p)
+# r <- AggregateFun_sum(p)
+# y <- CollapseFun(l)
+# u <- CombineFun(r,y)
+j <- CleanFun(p)
+}
+
+
+
+# rm(dat_TS_yld)
+
+# Applying transformation to all yld_tech_types
+TA_yld <- TransformFun_area(dat_TA_yld)
+TH_yld <- TransformFun_area(dat_TH_yld)
+TI_yld <- TransformFun_area(dat_TI_yld)
+TL_yld <- TransformFun_area(dat_TL_yld)
+TR_yld <- TransformFun_area(dat_TR_yld)
+TS_yld <- TransformFun_area(dat_TS_yld)
+lst.yld <- list(TA_yld, TH_yld, TI_yld, TL_yld, TR_yld, TS_yld)
+dat_yld <- do.call("rbind", lst.yld) # merging all tech types
+write.csv(dat_yld, 'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/Yield_full_cropspread.csv', row.names=T) 
+
+# Applying transformation to production
+TA_prd <- TransformFun_area(dat_TA_pd)
+TH_prd <- TransformFun_area(dat_TH_pd)
+TI_prd <- TransformFun_area(dat_TI_pd)
+TL_prd <- TransformFun_area(dat_TL_pd)
+TR_prd <- TransformFun_area(dat_TR_pd)
+TS_prd <- TransformFun_area(dat_TS_pd)
+lst.prd <- list(TA_prd, TH_prd, TI_prd, TL_prd, TR_prd, TS_prd)
+dat_prd <- do.call("rbind", lst.prd) # merging all tech types
+write.csv(dat_prd, 'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/Production_full_cropspread.csv', row.names=T) 
+
+#Applying transformation to physical_tech types
+TA_phy <- TransformFun_area(dat_TA_pa)
+TH_phy <- TransformFun_area(dat_TH_pa)
+TI_phy <- TransformFun_area(dat_TI_pa)
+TL_phy <- TransformFun_area(dat_TL_pa)
+TR_phy <- TransformFun_area(dat_TR_pa)
+TS_phy <- TransformFun_area(dat_TS_pa)
+lst.phy <- list(TA_phy, TH_phy, TI_phy, TL_phy, TR_phy, TS_phy)
+dat_phy <- do.call("rbind", lst.phy) # merging all tech types
+write.csv(dat_phy, 'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/Phys_area_full_cropspread.csv', row.names=T) 
+
+#Applying transformation to physical_tech types
+TA_har <- TransformFun_area(dat_TA_ha)
+TH_har <- TransformFun_area(dat_TH_ha)
+TI_har <- TransformFun_area(dat_TI_ha)
+TL_har <- TransformFun_area(dat_TL_ha)
+TR_har <- TransformFun_area(dat_TR_ha)
+TS_har <- TransformFun_area(dat_TS_ha)
+lst.har <- list(TA_har, TH_har, TI_har, TL_har, TR_har, TS_har)
+dat_har <- do.call("rbind", lst.har) # merging all tech types
+write.csv(dat_har, 'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/Harv_area_spatial_spread_SPAM.csv', row.names=T) 
+
+# Independent -- Merging treenuts with SPAM -------------------------------
+# LOAD data from previous step (due to memory overuse)
+setwd(here('Input_data', "Rectypes_full_cropspread"))
+yld_dat <- read_csv('Yield_full_cropspread.csv')
+prd_dat <- read_csv('Production_full_cropspread.csv')
+har_dat <- read_csv('Harv_area_spatial_spread_SPAM.csv')
+phy_dat <- read_csv('Phys_area_full_cropspread.csv')
+
+
+# TREENUTS WITH GADM ------------------------------------------------------
+# # Load and check shapefile 
+# wd=setwd(here("Input_data"))
+# sph.prod = read_sf(dsn = wd, layer = "GADM_treenut_rest_pd")
+# sph.harv = read_sf(dsn = wd, layer = "GADM_treenut_rest_ha")
+# sph.all = st_join(sph.prod, sph.harv, join = st_within)
+# # write_sf(sph.all, dsn = here("Input_data", driver = "ESRI shapefile"))
+# sph.prod.tib = as_tibble(sph.prod)
+# sph.prod.tib = dplyr::select(sph.prod.tib, UID, GID_0, GID_1, trnt_r_)
+# 
+# sph.harv.tib = as_tibble(sph.harv)
+# sph.harv.tib = dplyr::select(sph.harv.tib, UID, GID_0, GID_1, trnt_r_)
+# 
+# # loading SPAM-GADM join
+# spam.gadm.link = read_sf(dsn = here("Input_data", "GADMtoSPAM", "GADM_to_SPAM_within"), 
+#                          layer = "GADM_to_SPAM_within")
+# # 
+# # Treenuts GADM pixel  ---------------------------------------------------------------
+# # CAUTION: THESE FILES ARE WRONG! THE RATIO IS WRONGLY CALULATED (not grouped by adm1 and adm0 level)
+# # Treenuts are loaded from the treenuts script
+# # Production
+# # rbenchmark::benchmark(
+# spam.gadm.link %>% 
+#   as_tibble() %>% 
+#   left_join(.,sph.prod.tib, by="UID") %>% 
+#   left_join(.,prd_dat, by="alloc_key") %>% 
+#   mutate(tnuts = trnt_r_*rest) %>% 
+#   mutate(rest.1 = (1-trnt_r_)*rest) %>% 
+#   dplyr::select(alloc_key, UID, GID_0.x, GID_1.x, rec_type, tech_type, unit,
+#          whea:rest, tnuts, rest.1) %>% 
+#   write.csv(here("Input_data", "prod_tnuts_spam_gadm.csv"))
+# 
+# # 
+# # c=read_csv(here("Input_data", "prod_tnuts_spam_gadm.csv"))
+# # 
+# # Approach validation > do the numbers make sense?
+# # xy=dat_join_prod %>% 
+# #  group_by(tnuts) %>%
+# #   top_n(n=10) %>% 
+# #   dplyr::select(rest, tnuts, rest.1)
+# 
+# # write_csv(dat_join_prod, here("Input_data", "dat_treenut_prod.csv"))
+#   # st_as_sf()
+# 
+# # Harvested area 
+# # rbenchmark::benchmark(
+# spam.gadm.link %>% 
+#     as_tibble() %>% 
+#     left_join(., har_dat, by="alloc_key") %>%
+#     left_join(.,sph.harv.tib, by="UID") %>% 
+#   mutate(tnuts = trnt_r_*rest) %>% 
+#   mutate(rest.1 = (1-trnt_r_)*rest) %>% 
+#   dplyr::select(alloc_key, UID, GID_0.x, GID_1.x, rec_type, tech_type, unit,
+#                 whea:rest, tnuts, rest.1) %>% 
+#   write.csv(here("Input_data", "harv_tnuts_spam_gadm"))
+# # )
+# 
+# # Physical land
+# # rbenchmark::benchmark(
+# spam.gadm.link %>% 
+#   as_tibble() %>% 
+#   left_join(., phy_dat, by="alloc_key") %>%
+#   left_join(.,sph.harv.tib, by="UID") %>% 
+#   mutate(tnuts = trnt_r_*rest) %>% 
+#   mutate(rest.1 = (1-trnt_r_)*rest) %>% 
+#   dplyr::select(alloc_key, UID, GID_0.x, GID_1.x, rec_type, tech_type, unit,
+#                 whea:rest, tnuts, rest.1) %>% 
+#   write.csv(here("Input_data", "phys_tnuts_spam_gadm"))
+# 
+# # Approach: Aim is to get dinal datasets on pixel level > Adm1 level
+# 1_Cbind the three datasets 
+# 2_Gather crops 
+# 3_Spread levels rec type
+# 4_Calculate yields after unit transformation  
+
+# # 1. Load data
+# # Global 
+# dat.phys = read_csv(here("Input_data", "phys_tnuts_spam_gadm.csv"))
+# dat.prod = read_csv(here("Input_data", "prod_tnuts_spam_gadm,csv"))
+# dat.harv = read_csv(here("Input_data", "harv_tnuts_spam_gadm.csv"))
+# 
+# 
+# # lst.tnuts = list(dat.harv, dat.phys, dat.prod)
+# dat_nutsall <- do.call("rbind", list(dat.harv, dat.phys, dat.prod))
+# 
+# # GLobal - Issue: Do this with the new laptop! 
+# # Principle of this function is to not generate any variable as this can lead to overuse of RAM.
+# do.call("rbind", list(read_csv(here("Input_data", "phys_tnuts_spam_gadm")), 
+#                       read_csv(here("Input_data", "prod_tnuts_spam_gadm")), 
+#                       read_csv(here("Input_data", "harv_tnuts_spam_gadm")))) %>%
+#   dplyr::select(-c(rest, X1)) %>%
+#   pivot_longer(names_to = "crops", values_to = "value", cols = whea:rest.1) %>% 
+#   pivot_wider(-unit, names_from = "rec_type", 
+#               values_from = "value" ) %>% 
+#   mutate(Y = (P*1000/H))%>% #checked for result. We are transforming mt to tons when multiplying Producction by 1000 
+#   mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% 
+#   rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% 
+#   write.csv(here("Input_data", "pxl_rec_tec_gadm_spam_tnuts"))
+
+# # SUBSETTING FOR EU-27 GADM ----------------------------------------------------------------------
+# EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", 
+#             "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR")
+# write.csv(EU_iso3, here("Input_data", "EU27_iso3_lst.csv"))
+# length(EU_iso3) #27 member states > the list comes from gams
+# 
+# # Check if the EU countries iso3 are all present in dat.phys etc.
+# EU_iso3 %in% dat.phys$GID_0.x #Validation 
+# dat.phys.EU = read_csv(here("Input_data", "phys_tnuts_spam_gadm"))
+#   filter(dat.phys, GID_0.x %in% EU_iso3)
+# 
+# dat.prod.EU = read_csv(here("Input_data", "prod_tnuts_spam_gadm")) %>%
+#   filter(dat.prod, GID_0.x %in% EU_iso3)
+# 
+# dat.harv.EU = read_csv(here("Input_data", "harv_tnuts_spam_gadm")) %>% 
+# dat.harv.EU = filter(dat.harv, GID_0.x %in% EU_iso3)
+# 
+# 
+# do.call("rbind", list(read_csv(here("Input_data", "phys_tnuts_spam_gadm")),
+#                       read_csv(here("Input_data", "prod_tnuts_spam_gadm")), 
+#                       read_csv(here("Input_data", "harv_tnuts_spam_gadm")))) %>% 
+#   filter(GID_0.x %in% EU_iso3) %>% 
+#   dplyr::select(-c(rest, X1)) %>%
+#   pivot_longer(names_to = "crops", values_to = "value", cols = whea:rest.1) %>% 
+#   pivot_wider(-unit, names_from = "rec_type", 
+#               values_from = "value" ) %>% 
+#   mutate(Y = (P*1000/H))%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. 
+#   mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% 
+#   rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% 
+#   write.csv(here("Input_data", "pxl_rec_tec_gadm_spam_tnuts_EU27"))
+# 
+# # Testing the approach
+# {
+# # cbind dfs - testing subset
+# fil.prod = dat.prod %>% top_n(n=20)
+# fil.harv = dat.harv %>% top_n(n=20)
+# fil.phys  = dat.phys %>% top_n(n=20)
+# 
+# lst.tnuts = list(fil.harv, fil.phys, fil.prod)
+# do.call("rbind", list(fil.harv, fil.phys, fil.prod))
+# 
+# long.dat.nuts <- do.call("rbind", list(fil.harv, fil.phys, fil.prod)) %>%
+#   dplyr::select(-c(X1)) %>%
+#   pivot_longer(names_to = "crops", values_to = "value", cols = whea:tnuts) %>% 
+#   pivot_wider(-unit, names_from = "rec_type", 
+#               values_from = "value" ) %>% 
+#   mutate(Y = (P*1000/H))%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. 
+#   mutate_at(vars(Y), ~replace(., is.nan(.), 0)) #%>% 
+#   # rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% 
+#   write.csv(here("Input_data", "test_rbind_fil"))
+# } 
+
+
+
+
+
+# SUBSETTING FOR EU-27 GAUL  ----------------------------------------------------------------------
+EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", 
+            "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR")
+# write.csv(EU_iso3, here("Input_data", "EU27_iso3_lst.csv"))
+length(EU_iso3) #27 member states > the list comes from gams
+
+# Check if the EU countries iso3 are all present in dat.phys etc.
+# EU_iso3 %in% dat.phys$iso3 #Validation if all EU27 countries are in the dfs below
+dat.phys.EU = read_csv(here("Input_data", "phys_tnuts_rest_spam_gaul_FINAL.csv")) %>% #Pixel level data
+  filter(iso3 %in% EU_iso3) %>% 
+  mutate(alloc_key = str_sub(alloc_key, start=2)) %>% 
+  dplyr::select(-c(X1_1, X1))
+
+  
+dat.prod.EU = read_csv(here("Input_data", "prod_tnuts_rest_spam_gaul_FINAL1.csv")) %>% 
+filter(iso3 %in% EU_iso3)%>% 
+  mutate(alloc_key = str_sub(alloc_key, start=2)) %>% 
+  dplyr::select(-c(X1))
+
+
+dat.harv.EU = read_csv(here("Input_data", "harv_tnuts_rest_spam_gaul_FINAL.csv")) %>% 
+  filter(iso3 %in% EU_iso3) %>% 
+  mutate(alloc_key = str_sub(alloc_key, start=2)) %>% 
+  dplyr::select(-c(X1_1, X1))
+
+ 
+dat.all.EU=  do.call("rbind", list(dat.phys.EU, dat.prod.EU, dat.harv.EU)) %>% 
+  # filter(iso3 %in% EU_iso3) %>% 
+  # dplyr::select(-c(X1, X1_1)) %>%
+  pivot_longer(names_to = "crops", values_to = "value", cols = whea:tnuts) %>% 
+  pivot_wider(-unit, names_from = "rec_type", 
+              values_from = "value" ) %>% 
+  mutate(Y = (P*1000/H)) #%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. 
+  mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% 
+  # rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% 
+  write_csv(here("Input_data", "pxl_rec_tec_gaul_spam_tnuts_EU_27_corr_prod.csv")) #IMPORTANT dataset. Pixel level. Tnuts.
+dat.all.EU=read_csv(here("Input_data","pxl_rec_tec_gaul_spam_tnuts_EU_27_corr_prod.csv"))
+
+# Testing the approach
+# {
+  # # cbind dfs - testing subset
+  # fil.prod = dat.prod %>% top_n(n=20)
+  # fil.harv = dat.harv %>% top_n(n=20)
+  # fil.phys  = dat.phys %>% top_n(n=20)
+  # 
+  # lst.tnuts = list(fil.harv, fil.phys, fil.prod)
+  # x= do.call("rbind", list(fil.harv, fil.phys, fil.prod))
+  # 
+  # long.dat.nuts <- do.call("rbind", list(fil.harv, fil.phys, fil.prod)) %>%
+  #   dplyr::select(-c(X1)) %>%
+  #   pivot_longer(names_to = "crops", values_to = "value", cols = whea:rest) %>% 
+  #   pivot_wider(-unit, names_from = "rec_type", 
+  #               values_from = "value" ) #%>% 
+  #   mutate(Y = (P*1000/H))%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. 
+  #   mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% 
+  #   rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% 
+  #   write.csv(here("Input_data", "test_rbind_fil"))
+  # } 
+
+# Crop name mapping -------------------------------------------------------
+# SPAM mapping 
+nam_cat <- read_csv(here("Input_data", 'crop_names.csv')) # Loading crop name sheets - mapping SPAM -- FAO (FROM SPAM)
+nam_cat <- nam_cat %>%  mutate(FAOCODE = as.double(FAOCODE)) %>% 
+  rename(No_spam = names(nam_cat)[1])
+
+GAEZ_SPAM_crp = read_csv(here("Input_data", "6-GAEZ-and-SPAM-crops-correspondence-2015-02-26.csv")) #Mapping GAEZ and SPAM (from SPAM)
+GAEZ_SPAM_crp = GAEZ_SPAM_crp %>% 
+  rename(No_spam = names(.)[4]) %>% 
+  dplyr::select('SPAM crop long name', 'GAEZ crop', No_spam) 
+
+# ORiginal Croplist Modeldat CIFOS
+FAO_CIFOS_crp = read_csv(here("Input_data", "FAO_modeldat_2020.csv"))
+
+# FAO spreaddsheets 
+FAO_stat_control = read_csv(here("Input_data","FAOSTAT_data_12-26-2020-1.csv"))
+food_cat <- read_csv(here("Input_data",'foodnonfood.csv'))# laoding food usage sheet 
+food_cat = food_cat %>% 
+  rename(`SPAM short name`=Crop_short)
+FAOcntr <- read_csv(here("Input_data",'FAO_countries.csv')) # loading FAO country codes
+FAOcntr <- dplyr::select(FAOcntr, 'ISO3', 'FAOSTAT_CODE', "GAUL_CODE", 'Continent', 'Subcontinent')
+
+
+# The incomplete mapping of SPAM, GAEZ, FAOstat, FAO_Cifos are made 
+# Not all the crops are matched perfectly. This is made by hand in excel (export-import a chunk below)
+model_dat = left_join(FAO_CIFOS_crp, FAO_stat_control, by="crop") %>%
+  left_join(nam_cat, ., by="FAOCODE") %>%
+  left_join(.,GAEZ_SPAM_crp, by= "No_spam") #%>% 
+  write.csv(here("Input_data", "model_crops_incomplete.csv"))
+
+# This data frame is a complete mapping of GAEZ, SPAM and FAO (FAO original and CIFOS model dat FAO crop names)
+# The FAO crop names from the model data differ slightly between Model dat and the fAO crops that I exctracted from FAOSTAT. 
+# Crop others were adjusted according to SPAM indication and the GAEZ mapped crops (cereal others = oats, etc.)
+
+dat_crp_map <- read_csv(here("Input_data","model_crops_mapping_GAEZ_SPAM_FAO_complete.csv"))
+# dat_crp_map2 <- read_csv(here("Input_data", "model_crops_incomplete.csv")) 
+
+#Validation > Result = Complete match!
+# dat_crp_ma p2$ crop_cifos %in% FAO_CIFOS_crp$crop 
+
+dat_crp_map1 = dat_crp_map %>% 
+  rename(crop_cifos = crop) %>% 
+  left_join(.,food_cat) #adding food categories (food/non-food)
+
+dat_crp_map1$Usage[42]="food" #adding the food category to treenuts 
+write.csv(dat_crp_map1, here("Input_data", "crop_name_mapping_complete.csv")) # this file can be used for further mapping of the crops 
+
+# Aggregating to ADM1 level EU -----------------------------------------------
+# Load and transform data 
+dat.pxl.EU = read_csv(here("Input_data", "pxl_rec_tec_gaul_spam_tnuts_EU_27_corr_prod.csv"))
+dat_TA_yld = read_csv(here("Input_data", "spam2010V2r0_global_Y_TA.csv")) %>% 
+  dplyr::select(alloc_key,name_cntr,name_adm1, iso3, prod_level, x, y) %>% 
+  mutate(alloc_key = str_sub(alloc_key, 2))
+
+
+# Mapping countries
+FAOcntr <- read_csv(here("Input_data", 'FAO_countries.csv'))
+FAOcntr <- dplyr::select(FAOcntr, 'ISO3', 'FAOSTAT_CODE', "GAUL_CODE", 'Continent', 'Subcontinent') %>% 
+  rename(iso3=ISO3)
+dat_EU27 = read_csv(here("Input_data", "Country_name_mapping_incomplete.csv"))%>% 
+  left_join(.,FAOcntr, by="iso3")
+
+dat_crop_nam = read_csv(here("Input_data", "crop_name_mapping_complete.csv")) %>% 
+  dplyr::select(-c(X1,X1_1)) %>% 
+  rename(crops = 'SPAM short name')
+
+# Transform pixel data so that it has SPAM variables (adm1 level) 
+
+dat.pixel.EU = dat.pxl.EU %>%
+  mutate_at(vars(alloc_key), funs(as.character)) %>%  
+  left_join(.,dat_TA_yld, by="alloc_key") %>%
+  dplyr::select(alloc_key, iso3.x, name_cntr.x,name_adm1.x, prod_level, iso3.x, G2008_1_ID, 
+                ADM0_CODE, ADM0_NAME, ADM1_CODE, ADM1_NAME, x, y, crops, tech_type) %>% 
+  rename(iso3=iso3.x,
+         name_cntr=name_cntr.x,
+         name_adm1=name_adm1.x) %>% 
+    left_join(.,dat_crop_nam, by="crops") %>% 
+  left_join(.,dat_EU27, by="iso3") %>% 
+  dplyr::select(prod_level, G2008_1_ID, tech_type, CIFOS_cntr_nam, crop_cifos, crops) 
+
+# df with prod_level and Cifos country EU
+EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", 
+            "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR")
+
+dat_cntr_EU = dat.pxl.EU %>%
+  mutate_at(vars(alloc_key), funs(as.character)) %>%
+  left_join(.,dat_TA_yld, by="alloc_key") %>%
+  dplyr::select(prod_level, iso3.x, name_adm1.x) %>% 
+  dplyr::distinct_at(vars('prod_level', 'iso3.x', "name_adm1.x"), .keep_all = F) %>%
+  rename(iso3=iso3.x, 
+         name_adm1=name_adm1.x) %>% 
+  filter(iso3 %in% EU_iso3) %>% 
+  left_join(.,dat_EU27, by="iso3") %>% 
+  dplyr::select(prod_level, iso3, name_adm1, CIFOS_cntr_nam, Continent, Subcontinent)# %>% 
+  # rename(
+    # iso3=iso3.x, 
+       # name_adm1=name_adm1.x)
+
+# df with crops and cifos crops
+dat_crop_nam1 = read_csv(here("Input_data", "crop_name_mapping_complete.csv")) %>% 
+  dplyr::select('SPAM short name', FAONAMES, crop_cifos) %>% 
+  rename(crops = 'SPAM short name')
+
+# Aggregating H,A,P and calculating Yields per Adm1 level 
+dat.adm1.EU = dat.pxl.EU %>%
+  mutate_at(vars(alloc_key), funs(as.character)) %>%
+#   left_join(., dat.pxl.EU, by="ADM1_NAME") %>% 
+# mutate_at(vars(alloc_key), funs(as.character)) %>%
+  left_join(.,dat_TA_yld, by="alloc_key") %>%
+  dplyr::select(iso3.x, name_cntr.x,  name_adm1.x, ADM0_NAME, ADM1_NAME, prod_level, tech_type, crops, A, H, P) %>%
+  group_by(prod_level, tech_type, crops) %>% 
+  summarize(across(.cols = A:P, .fns = sum))%>% #all three rec types are summed per adm1(prod_level) and crop
+  mutate(Y = (P*1000/H))%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. 
+  mutate_at(vars(Y), ~replace(., is.nan(.), 0)) #%>% 
+
+# This is the sheet that contains the parameters that are important for the GAMS input file
+# It contains yield, harvested and physical area per country, adm1 unit and crop.
+EU27_crp_adm1 = dat.adm1.EU %>% 
+  left_join(.,dat_cntr_EU, by="prod_level") %>% 
+  filter(iso3 %in% EU_iso3) %>%
+  left_join(.,dat_crop_nam1, by="crops") %>% 
+  dplyr::select(iso3, FAONAMES, Subcontinent, Continent, CIFOS_cntr_nam, name_adm1, crop_cifos, A, H, P, Y) %>% 
+  write_csv(here("Input_data", "EU27_crp_adm1_gaul.csv")) # Final crop yield,area,etc. file. 
+
+# Final dataset!!
+EU27_crp_adm1 <- read_csv(here("Input_data", "EU27_crp_adm1_gaul.csv"))
+View(EU27_crp_adm1)
+
+length(unique(EU27_crp_adm1$iso3))
+is.na(EU27_crp_adm1)#Correct! 27 countries present. 
+# Continue: Why is ALB in the final dataset. Check where this went wrong 
+# Validate Yields. Sum the ADM1 by hand. Check if matches 
+# Berechne ob das sinn macht > 319652 rows ?!
+
+
+# Then add again the GADM and get values for the SOC% >> ADd the peat non peat to it. 
+# ADd usage to the sheet (or single sheet)
+#
+
+# Bringing back some SPAM geo-variables to compare them with GADM 
+dat.agg.EU = dat_TA_yld %>% 
+  dplyr::select(iso3, prod_level, alloc_key, x, y,name_cntr, name_adm1) %>% 
+  right_join(.,dat.pxl.EU, by="alloc_key")
+  
+
+  
+  
+dat.agg.EU %>% dplyr::select(iso3) dat.pxl.EU %>% 
+  # dplyr::select(-Y) #%>% 
+  group_by(GID_1) %>% 
+  summarize
+
+max(dat.pxl.EU$Y)
+summarise()
+
+v <- validator()
+summary(v)
+cf <- confront(dat.pxl.EU, v)
+summary(cf)
+?confront
+
+
+
+# creating a tidy table with all tech variables in one df
+list.comb <- list(yld_dat, prod_dat, har_dat, phy_dat)
+list.comb2=list(dat_har, dat_phy, dat_yld, da)
+dat.comb <- do.call("rbind", list.comb)
+# write.csv(dat.comb, 'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/dat_comb_tidy.csv', row.names=T)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# loading crop usage sheet --------------------------------------------------------------
+setwd(here("Input_dat"))
+food_cat <- read_csv('foodnonfood.csv')
+# Loading crop name sheets
+nam_cat <- read_csv('crop_names.csv')
+# loading FAO country codes
+FAOcntr <- read_csv(here("Input_data", 'FAO_countries.csv'))
+FAOcntr <- dplyr::select(FAOcntr, 'ISO3', 'FAOSTAT_CODE', "GAUL_CODE", 'Continent', 'Subcontinent')
+
+# joining all together
+names(dat.comb)[8] <- "Crop_short"
+names(nam_cat)[2] <- "Crop_short"
+names(dat.comb)[2] <- "ISO3"
+
+join.use <- left_join(dat.comb, food_cat, by="Crop_short")
+join_FAOcntr <- left_join(join.use, FAOcntr, by="ISO3")
+SPAM_dat_tidy <- left_join(join_FAOcntr, nam_cat, by='Crop_short')
+
+
+
+
+
+AggregateFun_mean <- function(z){ #for yield
+  ddply(z, .(name_adm1, crop),
+        function(z) mean(z$valu[z$valu!=0]))
+}
+
+AggregateFun_sum <- function(g){ #for area
+  ddply(g, .(name_adm1, crop),
+        function(g) sum(g$valu))
+}
+
+CollapseFun <- function(h){
+  h %>% dplyr::distinct_at(vars('name_adm1', 'crop'), .keep_all = TRUE)
+}
+
+CombineFun <- function(d,s){
+  full_join(d,s, by=c("name_adm1", 'crop'))
+}
+
+
+
+# rename and reoder dataset columns
+Clean2Fun <- function(x){
+  i <- x %>% dplyr::rename(SPAMcrop_short=Crop_short, 
+                           SPAMcrop_long=`SPAM long name`,
+                           use=Usage) %>%
+    dplyr::select(-c(`No. crt.`, ID))%>%
+    arrange(name_adm1, SPAMcrop_short, rec_type, tech_type)
+    c <- i[c('ISO3', 'name_cntr', 'name_adm1', 'alloc_key', 'FAOSTAT_CODE', 'GAUL_CODE',   'SPAMcrop_short', 'Continent', 'Subcontinent', 'SPAMcrop_long', 'FAONAMES', 'FAOCODE', 'GROUP', 'use', 'rec_type', 'tech_type', 'unit', 'valu')]
+}
+SPAM_dat_tidy <-  Clean2Fun(SPAM_dat_tidy)
+
+# Save file - Option long dataset
+write.csv(SPAM_dat_tidy, here("Input_dat", "Yld_tidy_all.csv"), row.names=T)
+
+# Option2 - Harvested and Physical area horizontal
+SprdFun <- function(x){
+  t <- x %>% dplyr::select(-c(unit))  %>%
+    spread(rec_type, valu)%>% 
+    dplyr::rename(yield_kgha=Y, 
+                  phys_area_ha=A,
+                  harv_area_ha=H)
+  c <- t[c('Continent', 'Subcontinent', 'ISO3', 'name_cntr', 'name_adm1', 'alloc_key', 'FAOSTAT_CODE', 'GAUL_CODE', 'SPAMcrop_short', 'SPAMcrop_long', 'FAONAMES', 'FAOCODE', 'GROUP', 'use', 'tech_type', 'phys_area_ha', 'harv_area_ha', 'yield_kgha')]
+} 
+
+sprd_dat <- SprdFun(SPAM_dat_tidy)
+write.csv(sprd_dat_fixd, here("Input_dat","SPAMspread_all_fixd.csv"), row.names=T)
+
+
diff --git a/SPAM_Data_Transformation/SPAM_no_agg_spatial_2.R b/SPAM_Data_Transformation/SPAM_no_agg_spatial_2.R
new file mode 100644
index 0000000000000000000000000000000000000000..d8dae4c1f94cbe496141b0fbf46cdf601ecc3770
--- /dev/null
+++ b/SPAM_Data_Transformation/SPAM_no_agg_spatial_2.R
@@ -0,0 +1,782 @@
+easypackages::packages(c("tidyverse", 'sf', 'raster','sp','dplyr','spatialEco',
+                         'exactextractr','tibble', 'here', "readr",
+                         "readxl", "foreign", "data.table", "rbenchmark", "benchmarkme",
+                         "validate"))
+setwd(here("Input_data"))
+# here::here()
+##To know the current storage capacity
+memory.limit()
+
+## To increase the storage capacity
+# memory.limit(size=20000)
+gc()
+# Load data ----------------------------------------------------------------
+# Yields ------------------------------------------------------------------
+dat_TA_yld <- read_csv("spam2010V2r0_global_Y_TA.csv")# Yields - all technologies together, ie complete crop - Data downloaded on 1. of September 2020
+dat_TI_yld <- read_csv("spam2010V2r0_global_Y_TI.csv")# Yields - Iirrigated portion of crop
+dat_TH_yld <- read_csv("spam2010V2r0_global_Y_TH.csv")# Yields - rainfed high inputs portion of crop
+dat_TL_yld <- read_csv("spam2010V2r0_global_Y_TL.csv")# Yields - rainfed low inputs portion of crop
+dat_TS_yld <- read_csv("spam2010V2r0_global_Y_TS.csv")# Yields - rainfed subsistence portion of crop
+dat_TR_yld <- read_csv("spam2010V2r0_global_Y_TR.csv")# Yields - rainfed portion of crop (= TA - TI, or TH + TL + TS)
+
+
+# Production
+dat_TA_pr <- read_csv("spam2010V2r0_global_P_TA.csv")
+dat_TI_pr <- read_csv("spam2010V2r0_global_P_TI.csv")
+dat_TH_pr <- read_csv("spam2010V2r0_global_P_TH.csv")
+dat_TL_pr <- read_csv("spam2010V2r0_global_P_TL.csv")
+dat_TS_pr <- read_csv("spam2010V2r0_global_P_TS.csv")
+dat_TR_pr <- read_csv("spam2010V2r0_global_P_TR.csv")
+
+# Physical area--------------------------------
+dat_TA_pa <- read_csv("spam2010V2r0_global_A_TA.csv")
+dat_TI_pa <- read_csv("spam2010V2r0_global_A_TI.csv")
+dat_TH_pa <- read_csv("spam2010V2r0_global_A_TH.csv")
+dat_TL_pa <- read_csv("spam2010V2r0_global_A_TL.csv")
+dat_TS_pa <- read_csv("spam2010V2r0_global_A_TS.csv")
+dat_TR_pa <- read_csv("spam2010V2r0_global_A_TR.csv")
+
+# Harvesting area----------------------------------
+dat_TA_ha <- read_csv("spam2010V2r0_global_H_TA.csv")
+dat_TI_ha <- read_csv("spam2010V2r0_global_H_TI.csv")
+dat_TH_ha <- read_csv("spam2010V2r0_global_H_TH.csv")
+dat_TL_ha <- read_csv("spam2010V2r0_global_H_TL.csv")
+dat_TS_ha <- read_csv("spam2010V2r0_global_H_TS.csv")
+dat_TR_ha <- read_csv("spam2010V2r0_global_H_TR.csv")
+
+# Functions - Yld ---------------------------------------------------------------
+ChangeNames <- function(x) { ## Renaming function
+  crp <- c('whea', 'rice', 'maiz', 'barl', 'pmil', 'smil', 'sorg', 'ocer', 'pota', 'swpo', 'yams', 'cass', 'orts', 'bean', 'chic', 'cowp', 'pige', 'lent', 'opul', 'soyb',
+           'grou', 'cnut', 'oilp', 'sunf', 'rape', 'sesa', 'ooil', 'sugc', 'sugb', 'cott', 'ofib', 'acof', 'rcof', 'coco', 'teas', 'toba', 'bana', 'plnt', 'trof', 'temf', 'vege', 'rest')
+  names(x)[10:51] <- crp
+  return(x)
+} 
+GatherFun <- function(k){
+  k %>% gather('crop', 'valu', whea:rest)
+}
+# -------------------------------------------
+# If no aggregartion
+AggregateFun_mean <- function(z){ #for yield
+  ddply(z, .(name_adm1, crop),
+        function(z) mean(z$valu[z$valu!=0]))
+}
+
+AggregateFun_sum <- function(g){ #for area
+  ddply(g, .(name_adm1, crop),
+        function(g) sum(g$valu))
+}
+
+CollapseFun <- function(h){
+  h %>% dplyr::distinct_at(vars('name_adm1', 'crop'), .keep_all = TRUE)
+}
+
+CombineFun <- function(d,s){
+  full_join(d,s, by=c("name_adm1", 'crop'))
+}
+# -------------------------
+CleanFun <- function(l){
+  l %>% 
+    # dplyr::rename(
+    #   # value = V1,
+    #   cntr = iso3) %>%
+    dplyr::select(iso3, name_cntr, name_adm1, name_adm2, alloc_key, x, y, rec_type, tech_type, unit, whea:rest)
+}
+
+TransformFun_yld <- function(d){
+  p <- ChangeNames(d)
+  # l <- GatherFun(p)
+  # r <- AggregateFun_mean(l)
+  # y <- CollapseFun(l)
+  # u <- CombineFun(r,y)
+  j <- CleanFun(p)
+}
+
+TransformFun_area <- function(d){
+  p <- ChangeNames(d)
+  # l <- GatherFun(p)
+  # r <- AggregateFun_sum(l)
+  # y <- CollapseFun(l)
+  # u <- CombineFun(r,y)
+  j <- CleanFun(p)
+}
+
+
+# With aggregation long -----------------------------------------------------
+ChangeNames <- function(x) { ## Renaming function
+  crp <- c('whea', 'rice', 'maiz', 'barl', 'pmil', 'smil', 'sorg', 'ocer', 'pota', 'swpo', 'yams', 'cass', 'orts', 'bean', 'chic', 'cowp', 'pige', 'lent', 'opul', 'soyb',
+           'grou', 'cnut', 'oilp', 'sunf', 'rape', 'sesa', 'ooil', 'sugc', 'sugb', 'cott', 'ofib', 'acof', 'rcof', 'coco', 'teas', 'toba', 'bana', 'plnt', 'trof', 'temf', 'vege', 'rest')
+  names(x)[10:51] <- crp
+  return(x)
+} 
+GatherFun <- function(k){
+  k %>% gather('crop', 'valu', whea:rest)
+}
+
+AggregateFun_mean <- function(z){ #for yield
+  ddply(z, .(name_adm1, crop),
+        function(z) mean(z$valu[z$valu!=0]))
+}
+
+AggregateFun_sum <- function(g){ #for area
+  ddply(g, .(name_adm1, crop),
+        function(g) sum(g$valu))
+}
+
+CollapseFun <- function(h){
+  h %>% dplyr::distinct_at(vars('name_adm1', 'crop'), .keep_all = TRUE)
+}
+
+CombineFun <- function(d,s){
+  full_join(d,s, by=c("name_adm1", 'crop'))
+}
+
+# CleanFun <- function(l){
+u %>% 
+  dplyr::rename(value = V1) #%>%
+dplyr::select(iso3, name_cntr, name_adm1, name_adm2, 
+              alloc_key, x, y, rec_type, tech_type, 
+              unit, whea:rest)
+}
+
+TransformFun_yld <- function(d){
+  p <- ChangeNames(d)
+  l <- GatherFun(p)
+  r <- AggregateFun_mean(l)
+  y <- CollapseFun(l)
+  u <- CombineFun(r,y)
+  j <- CleanFun(p)
+}
+
+# TransformFun_area <- function(d){
+p <- ChangeNames(dat_TA_pd)
+l <- GatherFun(p)
+r <- AggregateFun_sum(p)
+y <- CollapseFun(l)
+u <- CombineFun(r,y)
+j <- CleanFun(p)
+}
+
+
+# With aggregation l spread crops  ----------------------------------------
+
+
+
+ChangeNames <- function(x) { ## Renaming function
+  crp <- c('whea', 'rice', 'maiz', 'barl', 'pmil', 'smil', 'sorg', 'ocer', 'pota', 'swpo', 'yams', 'cass', 'orts', 'bean', 'chic', 'cowp', 'pige', 'lent', 'opul', 'soyb',
+           'grou', 'cnut', 'oilp', 'sunf', 'rape', 'sesa', 'ooil', 'sugc', 'sugb', 'cott', 'ofib', 'acof', 'rcof', 'coco', 'teas', 'toba', 'bana', 'plnt', 'trof', 'temf', 'vege', 'rest')
+  names(x)[10:51] <- crp
+  return(x)
+} 
+
+GatherFun <- function(k){
+  k %>% gather('crop', 'valu', whea:rest)
+}
+
+# AggregateFun_mean <- function(z){ #for yield
+#   ddply(z, .(name_adm1, crop),
+#         function(z) mean(z$valu[z$valu!=0]))
+# }
+
+
+# }
+
+AggregateFun_sum <- function(g){ #for area
+  ddply(g, .(name_adm1, crop),
+        function(g) sum(g$valu))
+}
+
+CollapseFun <- function(h){
+  h %>% dplyr::distinct_at(vars('name_adm1', 'crop'), .keep_all = TRUE)
+}
+
+CombineFun <- function(d,s){
+  full_join(d,s, by=c("name_adm1", 'crop'))
+}
+
+CleanFun <- function(l){
+  l %>% 
+    # dplyr::rename(value = V1) #%>%
+    dplyr::select(iso3, name_adm1,
+                  alloc_key, x, y, rec_type, tech_type, 
+                  unit, whea:rest)
+}
+
+# TransformFun_yld <- function(d){
+p <- ChangeNames(d)
+# l <- GatherFun(p)
+# r <- AggregateFun_mean(l)
+# y <- CollapseFun(l)
+# u <- CombineFun(r,y)
+j <- CleanFun(u)
+# }
+
+TransformFun_area <- function(d){
+  p <- ChangeNames(d)
+  # l <- GatherFun(p)
+  # r <- AggregateFun_sum(p)
+  # y <- CollapseFun(l)
+  # u <- CombineFun(r,y)
+  j <- CleanFun(p)
+}
+
+
+
+# rm(dat_TS_yld)
+
+# Applying transformation to all yld_tech_types
+TA_yld <- TransformFun_area(dat_TA_yld)
+TH_yld <- TransformFun_area(dat_TH_yld)
+TI_yld <- TransformFun_area(dat_TI_yld)
+TL_yld <- TransformFun_area(dat_TL_yld)
+TR_yld <- TransformFun_area(dat_TR_yld)
+TS_yld <- TransformFun_area(dat_TS_yld)
+lst.yld <- list(TA_yld, TH_yld, TI_yld, TL_yld, TR_yld, TS_yld)
+dat_yld <- do.call("rbind", lst.yld) # merging all tech types
+write.csv(dat_yld, 'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/Yield_full_cropspread.csv', row.names=T) 
+
+# Applying transformation to production
+TA_prd <- TransformFun_area(dat_TA_pd)
+TH_prd <- TransformFun_area(dat_TH_pd)
+TI_prd <- TransformFun_area(dat_TI_pd)
+TL_prd <- TransformFun_area(dat_TL_pd)
+TR_prd <- TransformFun_area(dat_TR_pd)
+TS_prd <- TransformFun_area(dat_TS_pd)
+lst.prd <- list(TA_prd, TH_prd, TI_prd, TL_prd, TR_prd, TS_prd)
+dat_prd <- do.call("rbind", lst.prd) # merging all tech types
+write.csv(dat_prd, 'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/Production_full_cropspread.csv', row.names=T) 
+
+#Applying transformation to physical_tech types
+TA_phy <- TransformFun_area(dat_TA_pa)
+TH_phy <- TransformFun_area(dat_TH_pa)
+TI_phy <- TransformFun_area(dat_TI_pa)
+TL_phy <- TransformFun_area(dat_TL_pa)
+TR_phy <- TransformFun_area(dat_TR_pa)
+TS_phy <- TransformFun_area(dat_TS_pa)
+lst.phy <- list(TA_phy, TH_phy, TI_phy, TL_phy, TR_phy, TS_phy)
+dat_phy <- do.call("rbind", lst.phy) # merging all tech types
+write.csv(dat_phy, 'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/Phys_area_full_cropspread.csv', row.names=T) 
+
+#Applying transformation to physical_tech types
+TA_har <- TransformFun_area(dat_TA_ha)
+TH_har <- TransformFun_area(dat_TH_ha)
+TI_har <- TransformFun_area(dat_TI_ha)
+TL_har <- TransformFun_area(dat_TL_ha)
+TR_har <- TransformFun_area(dat_TR_ha)
+TS_har <- TransformFun_area(dat_TS_ha)
+lst.har <- list(TA_har, TH_har, TI_har, TL_har, TR_har, TS_har)
+dat_har <- do.call("rbind", lst.har) # merging all tech types
+write.csv(dat_har, 'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/Harv_area_spatial_spread_SPAM.csv', row.names=T) 
+
+# Independent -- Merging treenuts with SPAM -------------------------------
+# LOAD data from previous step (due to memory overuse)
+setwd(here('Input_data', "Rectypes_full_cropspread"))
+yld_dat <- read_csv('Yield_full_cropspread.csv')
+prd_dat <- read_csv('Production_full_cropspread.csv')
+har_dat <- read_csv('Harv_area_spatial_spread_SPAM.csv')
+phy_dat <- read_csv('Phys_area_full_cropspread.csv')
+
+
+# TREENUTS WITH GADM ------------------------------------------------------
+# # Load and check shapefile 
+# wd=setwd(here("Input_data"))
+# sph.prod = read_sf(dsn = wd, layer = "GADM_treenut_rest_pd")
+# sph.harv = read_sf(dsn = wd, layer = "GADM_treenut_rest_ha")
+# sph.all = st_join(sph.prod, sph.harv, join = st_within)
+# # write_sf(sph.all, dsn = here("Input_data", driver = "ESRI shapefile"))
+# sph.prod.tib = as_tibble(sph.prod)
+# sph.prod.tib = dplyr::select(sph.prod.tib, UID, GID_0, GID_1, trnt_r_)
+# 
+# sph.harv.tib = as_tibble(sph.harv)
+# sph.harv.tib = dplyr::select(sph.harv.tib, UID, GID_0, GID_1, trnt_r_)
+# 
+# # loading SPAM-GADM join
+# spam.gadm.link = read_sf(dsn = here("Input_data", "GADMtoSPAM", "GADM_to_SPAM_within"), 
+#                          layer = "GADM_to_SPAM_within")
+# # 
+# # Treenuts GADM pixel  ---------------------------------------------------------------
+# # CAUTION: THESE FILES ARE WRONG! THE RATIO IS WRONGLY CALULATED (not grouped by adm1 and adm0 level)
+# # Treenuts are loaded from the treenuts script
+# # Production
+# # rbenchmark::benchmark(
+# spam.gadm.link %>% 
+#   as_tibble() %>% 
+#   left_join(.,sph.prod.tib, by="UID") %>% 
+#   left_join(.,prd_dat, by="alloc_key") %>% 
+#   mutate(tnuts = trnt_r_*rest) %>% 
+#   mutate(rest.1 = (1-trnt_r_)*rest) %>% 
+#   dplyr::select(alloc_key, UID, GID_0.x, GID_1.x, rec_type, tech_type, unit,
+#          whea:rest, tnuts, rest.1) %>% 
+#   write.csv(here("Input_data", "prod_tnuts_spam_gadm.csv"))
+# 
+# # 
+# # c=read_csv(here("Input_data", "prod_tnuts_spam_gadm.csv"))
+# # 
+# # Approach validation > do the numbers make sense?
+# # xy=dat_join_prod %>% 
+# #  group_by(tnuts) %>%
+# #   top_n(n=10) %>% 
+# #   dplyr::select(rest, tnuts, rest.1)
+# 
+# # write_csv(dat_join_prod, here("Input_data", "dat_treenut_prod.csv"))
+#   # st_as_sf()
+# 
+# # Harvested area 
+# # rbenchmark::benchmark(
+# spam.gadm.link %>% 
+#     as_tibble() %>% 
+#     left_join(., har_dat, by="alloc_key") %>%
+#     left_join(.,sph.harv.tib, by="UID") %>% 
+#   mutate(tnuts = trnt_r_*rest) %>% 
+#   mutate(rest.1 = (1-trnt_r_)*rest) %>% 
+#   dplyr::select(alloc_key, UID, GID_0.x, GID_1.x, rec_type, tech_type, unit,
+#                 whea:rest, tnuts, rest.1) %>% 
+#   write.csv(here("Input_data", "harv_tnuts_spam_gadm"))
+# # )
+# 
+# # Physical land
+# # rbenchmark::benchmark(
+# spam.gadm.link %>% 
+#   as_tibble() %>% 
+#   left_join(., phy_dat, by="alloc_key") %>%
+#   left_join(.,sph.harv.tib, by="UID") %>% 
+#   mutate(tnuts = trnt_r_*rest) %>% 
+#   mutate(rest.1 = (1-trnt_r_)*rest) %>% 
+#   dplyr::select(alloc_key, UID, GID_0.x, GID_1.x, rec_type, tech_type, unit,
+#                 whea:rest, tnuts, rest.1) %>% 
+#   write.csv(here("Input_data", "phys_tnuts_spam_gadm"))
+# 
+# # Approach: Aim is to get dinal datasets on pixel level > Adm1 level
+# 1_Cbind the three datasets 
+# 2_Gather crops 
+# 3_Spread levels rec type
+# 4_Calculate yields after unit transformation  
+
+# # 1. Load data
+# # Global 
+# dat.phys = read_csv(here("Input_data", "phys_tnuts_spam_gadm.csv"))
+# dat.prod = read_csv(here("Input_data", "prod_tnuts_spam_gadm,csv"))
+# dat.harv = read_csv(here("Input_data", "harv_tnuts_spam_gadm.csv"))
+# 
+# 
+# # lst.tnuts = list(dat.harv, dat.phys, dat.prod)
+# dat_nutsall <- do.call("rbind", list(dat.harv, dat.phys, dat.prod))
+# 
+# # GLobal - Issue: Do this with the new laptop! 
+# # Principle of this function is to not generate any variable as this can lead to overuse of RAM.
+# do.call("rbind", list(read_csv(here("Input_data", "phys_tnuts_spam_gadm")), 
+#                       read_csv(here("Input_data", "prod_tnuts_spam_gadm")), 
+#                       read_csv(here("Input_data", "harv_tnuts_spam_gadm")))) %>%
+#   dplyr::select(-c(rest, X1)) %>%
+#   pivot_longer(names_to = "crops", values_to = "value", cols = whea:rest.1) %>% 
+#   pivot_wider(-unit, names_from = "rec_type", 
+#               values_from = "value" ) %>% 
+#   mutate(Y = (P*1000/H))%>% #checked for result. We are transforming mt to tons when multiplying Producction by 1000 
+#   mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% 
+#   rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% 
+#   write.csv(here("Input_data", "pxl_rec_tec_gadm_spam_tnuts"))
+
+# # SUBSETTING FOR EU-27 GADM ----------------------------------------------------------------------
+# EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", 
+#             "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR")
+# write.csv(EU_iso3, here("Input_data", "EU27_iso3_lst.csv"))
+# length(EU_iso3) #27 member states > the list comes from gams
+# 
+# # Check if the EU countries iso3 are all present in dat.phys etc.
+# EU_iso3 %in% dat.phys$GID_0.x #Validation 
+# dat.phys.EU = read_csv(here("Input_data", "phys_tnuts_spam_gadm"))
+#   filter(dat.phys, GID_0.x %in% EU_iso3)
+# 
+# dat.prod.EU = read_csv(here("Input_data", "prod_tnuts_spam_gadm")) %>%
+#   filter(dat.prod, GID_0.x %in% EU_iso3)
+# 
+# dat.harv.EU = read_csv(here("Input_data", "harv_tnuts_spam_gadm")) %>% 
+# dat.harv.EU = filter(dat.harv, GID_0.x %in% EU_iso3)
+# 
+# 
+# do.call("rbind", list(read_csv(here("Input_data", "phys_tnuts_spam_gadm")),
+#                       read_csv(here("Input_data", "prod_tnuts_spam_gadm")), 
+#                       read_csv(here("Input_data", "harv_tnuts_spam_gadm")))) %>% 
+#   filter(GID_0.x %in% EU_iso3) %>% 
+#   dplyr::select(-c(rest, X1)) %>%
+#   pivot_longer(names_to = "crops", values_to = "value", cols = whea:rest.1) %>% 
+#   pivot_wider(-unit, names_from = "rec_type", 
+#               values_from = "value" ) %>% 
+#   mutate(Y = (P*1000/H))%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. 
+#   mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% 
+#   rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% 
+#   write.csv(here("Input_data", "pxl_rec_tec_gadm_spam_tnuts_EU27"))
+# 
+# # Testing the approach
+# {
+# # cbind dfs - testing subset
+# fil.prod = dat.prod %>% top_n(n=20)
+# fil.harv = dat.harv %>% top_n(n=20)
+# fil.phys  = dat.phys %>% top_n(n=20)
+# 
+# lst.tnuts = list(fil.harv, fil.phys, fil.prod)
+# do.call("rbind", list(fil.harv, fil.phys, fil.prod))
+# 
+# long.dat.nuts <- do.call("rbind", list(fil.harv, fil.phys, fil.prod)) %>%
+#   dplyr::select(-c(X1)) %>%
+#   pivot_longer(names_to = "crops", values_to = "value", cols = whea:tnuts) %>% 
+#   pivot_wider(-unit, names_from = "rec_type", 
+#               values_from = "value" ) %>% 
+#   mutate(Y = (P*1000/H))%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. 
+#   mutate_at(vars(Y), ~replace(., is.nan(.), 0)) #%>% 
+#   # rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% 
+#   write.csv(here("Input_data", "test_rbind_fil"))
+# } 
+
+
+
+
+
+# SUBSETTING FOR EU-27 GAUL  ----------------------------------------------------------------------
+EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", 
+            "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR")
+# write.csv(EU_iso3, here("Input_data", "EU27_iso3_lst.csv"))
+length(EU_iso3) #27 member states > the list comes from gams
+
+# Check if the EU countries iso3 are all present in dat.phys etc.
+# EU_iso3 %in% dat.phys$iso3 #Validation if all EU27 countries are in the dfs below
+dat.phys.EU = read_csv(here("Input_data", "phys_tnuts_rest_spam_gaul_FINAL.csv")) %>% #Pixel level data
+  filter(iso3 %in% EU_iso3) %>% 
+  mutate(alloc_key = str_sub(alloc_key, start=2)) %>% 
+  dplyr::select(-c(X1_1, X1))
+
+
+dat.prod.EU = read_csv(here("Input_data", "prod_tnuts_rest_spam_gaul_FINAL1.csv")) %>% 
+  filter(iso3 %in% EU_iso3)%>% 
+  mutate(alloc_key = str_sub(alloc_key, start=2)) %>% 
+  dplyr::select(-c(X1))
+
+
+dat.harv.EU = read_csv(here("Input_data", "harv_tnuts_rest_spam_gaul_FINAL.csv")) %>% 
+  filter(iso3 %in% EU_iso3) %>% 
+  mutate(alloc_key = str_sub(alloc_key, start=2)) %>% 
+  dplyr::select(-c(X1_1, X1))
+
+
+dat.all.EU=  do.call("rbind", list(dat.phys.EU, dat.prod.EU, dat.harv.EU)) %>% 
+  # filter(iso3 %in% EU_iso3) %>% 
+  # dplyr::select(-c(X1, X1_1)) %>%
+  pivot_longer(names_to = "crops", values_to = "value", cols = whea:tnuts) %>% 
+  pivot_wider(-unit, names_from = "rec_type", 
+              values_from = "value" ) %>% 
+  mutate(Y = (P*1000/H)) #%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. 
+mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% 
+  # rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% 
+  write_csv(here("Input_data", "pxl_rec_tec_gaul_spam_tnuts_EU_27_corr_prod.csv")) #IMPORTANT dataset. Pixel level. Tnuts.
+dat.all.EU=read_csv(here("Input_data","pxl_rec_tec_gaul_spam_tnuts_EU_27_corr_prod.csv"))
+
+# Testing the approach
+# {
+# # cbind dfs - testing subset
+# fil.prod = dat.prod %>% top_n(n=20)
+# fil.harv = dat.harv %>% top_n(n=20)
+# fil.phys  = dat.phys %>% top_n(n=20)
+# 
+# lst.tnuts = list(fil.harv, fil.phys, fil.prod)
+# x= do.call("rbind", list(fil.harv, fil.phys, fil.prod))
+# 
+# long.dat.nuts <- do.call("rbind", list(fil.harv, fil.phys, fil.prod)) %>%
+#   dplyr::select(-c(X1)) %>%
+#   pivot_longer(names_to = "crops", values_to = "value", cols = whea:rest) %>% 
+#   pivot_wider(-unit, names_from = "rec_type", 
+#               values_from = "value" ) #%>% 
+#   mutate(Y = (P*1000/H))%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. 
+#   mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% 
+#   rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% 
+#   write.csv(here("Input_data", "test_rbind_fil"))
+# } 
+
+# Crop name mapping -------------------------------------------------------
+# SPAM mapping 
+nam_cat <- read_csv(here("Input_data", 'crop_names.csv')) # Loading crop name sheets - mapping SPAM -- FAO (FROM SPAM)
+nam_cat <- nam_cat %>%  mutate(FAOCODE = as.double(FAOCODE)) %>% 
+  rename(No_spam = names(nam_cat)[1])
+
+GAEZ_SPAM_crp = read_csv(here("Input_data", "6-GAEZ-and-SPAM-crops-correspondence-2015-02-26.csv")) #Mapping GAEZ and SPAM (from SPAM)
+GAEZ_SPAM_crp = GAEZ_SPAM_crp %>% 
+  rename(No_spam = names(.)[4]) %>% 
+  dplyr::select('SPAM crop long name', 'GAEZ crop', No_spam) 
+
+# ORiginal Croplist Modeldat CIFOS
+FAO_CIFOS_crp = read_csv(here("Input_data", "FAO_modeldat_2020.csv"))
+
+# FAO spreaddsheets 
+FAO_stat_control = read_csv(here("Input_data", "FAOSTAT_data_12-26-2020-1.csv"))
+food_cat <- read_csv(here("Input_data",'foodnonfood.csv'))# laoding food usage sheet 
+food_cat = food_cat %>% 
+  rename(`SPAM short name`=Crop_short)
+FAOcntr <- read_csv(here("Input_data", 'FAO_countries.csv')) # loading FAO country codes
+FAOcntr <- dplyr::select(FAOcntr, 'ISO3', 'FAOSTAT_CODE', "GAUL_CODE", 'Continent', 'Subcontinent')
+
+
+# The incomplete mapping of SPAM, GAEZ, FAOstat, FAO_Cifos are made 
+# Not all the crops are matched perfectly. This is made by hand in excel (export-import a chunk below)
+model_dat = left_join(FAO_CIFOS_crp, FAO_stat_control, by="crop") %>%
+  left_join(nam_cat, ., by="FAOCODE") %>%
+  left_join(.,GAEZ_SPAM_crp, by= "No_spam") %>% 
+  write.csv(here("Input_data", "model_crops_incomplete.csv"))
+
+# This data frame is a complete mapping of GAEZ, SPAM and FAO (FAO original and CIFOS model dat FAO crop names)
+# The FAO crop names from the model data differ slightly between Model dat and the fAO crops that I exctracted from FAOSTAT. 
+# Crop others were adjusted according to SPAM indication and the GAEZ mapped crops (cereal others = oats, etc.)
+
+dat_crp_map <- read_csv("model_crops_mapping_GAEZ_SPAM_FAO_complete.csv")
+# dat_crp_map2 <- read_csv(here("Input_data", "model_crops_incomplete.csv")) 
+
+#Validation > Result = Complete match!
+# dat_crp_ma p2$ crop_cifos %in% FAO_CIFOS_crp$crop 
+
+dat_crp_map1 = dat_crp_map %>% 
+  rename(crop_cifos = crop) %>% 
+  left_join(.,food_cat) #adding food categories (food/non-food)
+
+dat_crp_map1$Usage[42]="food" #adding the food category to treenuts 
+write.csv(dat_crp_map1, here("Input_data", "crop_name_mapping_complete.csv")) # this file can be used for further mapping of the crops 
+dat_crp_map=read_csv(here("Input_data", "crop_name_mapping_complete.csv"))# Aggregating to ADM1 level EU -----------------------------------------------
+
+
+# Pxl EU27 data  ----------------------------------------------------------
+
+
+dat.pxl.EU = read_csv(here("Input_data", "pxl_rec_tec_gaul_spam_tnuts_EU_27_corr_prod.csv"))
+dat_TA_yld = read_csv(here("Input_data", "spam2010V2r0_global_Y_TA.csv")) %>% 
+  dplyr::select(alloc_key,name_cntr,name_adm1, iso3, x, y) %>% 
+  mutate(alloc_key = str_sub(alloc_key, 2))
+
+
+# Mapping countries
+FAOcntr <- read_csv(here("Input_data", 'FAO_countries.csv'))
+FAOcntr <- dplyr::select(FAOcntr, 'ISO3', 'FAOSTAT_CODE', "GAUL_CODE", 'Continent', 'Subcontinent') %>% 
+  rename(iso3=ISO3)
+dat_EU27 = read_csv(here("Input_data", "Country_name_mapping_incomplete.csv"))%>% 
+  left_join(.,FAOcntr, by="iso3")
+
+dat_crop_nam = read_csv(here("Input_data", "crop_name_mapping_complete.csv")) %>% 
+  dplyr::select(-c(X1,X1_1)) %>% 
+  rename(crops = 'SPAM short name')
+
+# Transform pixel data so that it has SPAM variables (adm1 level) 
+
+dat.pixel.EU = dat.pxl.EU %>%
+  mutate_at(vars(alloc_key), funs(as.character)) %>%  
+  left_join(.,dat_TA_yld, by="alloc_key") %>%
+  dplyr::select(alloc_key, iso3.x, name_cntr.x,name_adm1.x, iso3.x, G2008_1_ID, 
+                ADM0_CODE, ADM0_NAME, ADM1_CODE, ADM1_NAME, x, y, crops, tech_type) %>% 
+  rename(iso3=iso3.x,
+         name_cntr=name_cntr.x,
+         name_adm1=name_adm1.x) %>% 
+  left_join(.,dat_crop_nam, by="crops") %>% 
+  left_join(.,dat_EU27, by="iso3") %>% 
+  dplyr::select(G2008_1_ID, tech_type, CIFOS_cntr_nam, crop_cifos, crops) 
+
+# df with prod_level and Cifos country EU
+EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", 
+            "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR")
+
+dat_cntr_EU = dat.pxl.EU %>%
+  mutate_at(vars(alloc_key), funs(as.character)) %>%
+  left_join(.,dat_TA_yld, by="alloc_key") %>%
+  dplyr::select(iso3.x, name_adm1.x) %>% 
+  dplyr::distinct_at(vars('iso3.x', "name_adm1.x"), .keep_all = F) %>%
+  rename(iso3=iso3.x, 
+         name_adm1=name_adm1.x) %>% 
+  filter(iso3 %in% EU_iso3) %>% 
+  left_join(.,dat_EU27, by="iso3") %>% 
+  dplyr::select(iso3, name_adm1, CIFOS_cntr_nam, Continent, Subcontinent)# %>% 
+dat_cntr_EU %>%  write_csv(here("Input_data", "dat_cntr_EU.csv"))
+
+# rename(
+# iso3=iso3.x, 
+# name_adm1=name_adm1.x)
+
+# df with crops and cifos crops
+dat_crop_nam1 = read_csv(here("Input_data", "crop_name_mapping_complete.csv")) %>% 
+  dplyr::select('SPAM short name', FAONAMES, crop_cifos) %>% 
+  rename(crops = 'SPAM short name')
+
+# Aggregating H,A,P and calculating Yields per Adm1 level 
+dat.adm1.EU = dat.pxl.EU %>%
+  mutate_at(vars(alloc_key), funs(as.character)) %>%
+  #   left_join(., dat.pxl.EU, by="ADM1_NAME") %>% 
+  # mutate_at(vars(alloc_key), funs(as.character)) %>%
+  left_join(.,dat_TA_yld, by="alloc_key") %>%
+  dplyr::select(iso3.x, name_cntr.x,  name_adm1.x, ADM0_NAME, ADM1_NAME, tech_type, crops, A, H, P) %>%
+  group_by(iso3.x, name_adm1.x, tech_type, crops) %>% 
+  summarize(across(.cols = A:P, .fns = sum))%>% #all three rec types are summed per adm1(prod_level) and crop
+  mutate(Y = (P*1000/H))%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. 
+  mutate_at(vars(Y), ~replace(., is.nan(.), 0)) #%>% 
+
+# This is the sheet that contains the parameters that are important for the GAMS input file
+# It contains yield, harvested and physical area per country, adm1 unit and crop.
+EU27_crp_adm1 = dat.adm1.EU %>% 
+  left_join(.,dat_cntr_EU, by=c("name_adm1.x"="name_adm1")) %>% 
+  filter(iso3 %in% EU_iso3) %>%
+  left_join(.,dat_crop_nam1, by="crops") %>% 
+  dplyr::select(iso3, FAONAMES, Subcontinent, Continent, CIFOS_cntr_nam, name_adm1, crop_cifos, A, H, P, Y) %>% 
+  write.csv(here("Input_data", "EU27_crp_adm1.csv")) # Final crop yield,area,etc. file. 
+
+# Validate
+EU27_crp_adm1 <- read_csv("C:/Users/simon083/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Modified_scripts/R/CifoS_Crop/Input_data/EU27_crp_adm1.csv")
+View(EU27_crp_adm1)
+
+length(unique(EU27_crp_adm1$iso3))
+is.na(EU27_crp_adm1)#Correct! 27 countries present. 
+# Continue: Why is ALB in the final dataset. Check where this went wrong 
+# Validate Yields. Sum the ADM1 by hand. Check if matches 
+# Berechne ob das sinn macht > 319652 rows ?!
+
+
+# Then add again the GADM and get values for the SOC% >> ADd the peat non peat to it. 
+# ADd usage to the sheet (or single sheet)
+#
+
+# Bringing back some SPAM geo-variables to compare them with GADM 
+dat.agg.EU = dat_TA_yld %>% 
+  dplyr::select(iso3, alloc_key, x, y,name_cntr, name_adm1) %>% 
+  right_join(.,dat.pxl.EU, by="alloc_key")
+
+
+
+
+dat.agg.EU %>% dplyr::select(iso3) dat.pxl.EU %>% 
+  # dplyr::select(-Y) #%>% 
+  group_by(GID_1) %>% 
+  summarize
+
+max(dat.pxl.EU$Y)
+summarise()
+
+v <- validator()
+summary(v)
+cf <- confront(dat.pxl.EU, v)
+summary(cf)
+?confront
+
+
+
+# creating a tidy table with all tech variables in one df
+list.comb <- list(yld_dat, prod_dat, har_dat, phy_dat)
+list.comb2=list(dat_har, dat_phy, dat_yld, da)
+dat.comb <- do.call("rbind", list.comb)
+# write.csv(dat.comb, 'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/dat_comb_tidy.csv', row.names=T)
+
+
+
+
+
+# For ArcGIS - area physical per pixel  --------------------------
+
+setwd(here("Input_data"))
+# Physical area--------------------------------
+dat_TA_pa <- read_csv("spam2010V2r0_global_A_TA.csv")
+dat_TA_pa %>% dplyr::select(alloc_key, x, y) %>%write_csv(here("Input_data", "coordinates_SPAM.csv"))
+
+
+Phys_area_full_cropspread <- read_csv("Rectypes_full_cropspread/Phys_area_full_cropspread.csv")
+x=Phys_area_full_cropspread %>% 
+  dplyr::rowwise() %>% 
+  dplyr::mutate(phys_area_pixl = sum(dplyr::c_across(whea:rest), na.rm = T))
+
+x2=x %>% 
+  dplyr::select(alloc_key, x, y, rec_type, tech_type, phys_area_pixl) %>% 
+  group_by(alloc_key) %>% 
+  mutate(sum_phys_area_pixel = sum(phys_area_pixl))
+
+unique_coord = x2 %>% 
+  distinct_at(vars(alloc_key, sum_phys_area_pixel)) %>% 
+  left_join(., Phys_area_full_cropspread, by="alloc_key") %>% 
+  dplyr::select(alloc_key, x, y, sum_phys_area_pixel) %>% 
+  distinct_at(vars(alloc_key, x, y, sum_phys_area_pixel))
+
+write("unique_coord", here("Input_data", "physical_area_per_pixel.csv"))
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# loading crop usage sheet --------------------------------------------------------------
+setwd(here("Input_dat"))
+food_cat <- read_csv('foodnonfood.csv')
+# Loading crop name sheets
+nam_cat <- read_csv('crop_names.csv')
+# loading FAO country codes
+FAOcntr <- read_csv(here("Input_data", 'FAO_countries.csv'))
+FAOcntr <- dplyr::select(FAOcntr, 'ISO3', 'FAOSTAT_CODE', "GAUL_CODE", 'Continent', 'Subcontinent')
+
+# joining all together
+names(dat.comb)[8] <- "Crop_short"
+names(nam_cat)[2] <- "Crop_short"
+names(dat.comb)[2] <- "ISO3"
+
+join.use <- left_join(dat.comb, food_cat, by="Crop_short")
+join_FAOcntr <- left_join(join.use, FAOcntr, by="ISO3")
+SPAM_dat_tidy <- left_join(join_FAOcntr, nam_cat, by='Crop_short')
+
+
+
+
+
+AggregateFun_mean <- function(z){ #for yield
+  ddply(z, .(name_adm1, crop),
+        function(z) mean(z$valu[z$valu!=0]))
+}
+
+AggregateFun_sum <- function(g){ #for area
+  ddply(g, .(name_adm1, crop),
+        function(g) sum(g$valu))
+}
+
+CollapseFun <- function(h){
+  h %>% dplyr::distinct_at(vars('name_adm1', 'crop'), .keep_all = TRUE)
+}
+
+CombineFun <- function(d,s){
+  full_join(d,s, by=c("name_adm1", 'crop'))
+}
+
+
+
+# rename and reoder dataset columns
+Clean2Fun <- function(x){
+  i <- x %>% dplyr::rename(SPAMcrop_short=Crop_short, 
+                           SPAMcrop_long=`SPAM long name`,
+                           use=Usage) %>%
+    dplyr::select(-c(`No. crt.`, ID))%>%
+    arrange(name_adm1, SPAMcrop_short, rec_type, tech_type)
+  c <- i[c('ISO3', 'name_cntr', 'name_adm1', 'alloc_key', 'FAOSTAT_CODE', 'GAUL_CODE',   'SPAMcrop_short', 'Continent', 'Subcontinent', 'SPAMcrop_long', 'FAONAMES', 'FAOCODE', 'GROUP', 'use', 'rec_type', 'tech_type', 'unit', 'valu')]
+}
+SPAM_dat_tidy <-  Clean2Fun(SPAM_dat_tidy)
+
+# Save file - Option long dataset
+write.csv(SPAM_dat_tidy, here("Input_dat", "Yld_tidy_all.csv"), row.names=T)
+
+# Option2 - Harvested and Physical area horizontal
+SprdFun <- function(x){
+  t <- x %>% dplyr::select(-c(unit))  %>%
+    spread(rec_type, valu)%>% 
+    dplyr::rename(yield_kgha=Y, 
+                  phys_area_ha=A,
+                  harv_area_ha=H)
+  c <- t[c('Continent', 'Subcontinent', 'ISO3', 'name_cntr', 'name_adm1', 'alloc_key', 'FAOSTAT_CODE', 'GAUL_CODE', 'SPAMcrop_short', 'SPAMcrop_long', 'FAONAMES', 'FAOCODE', 'GROUP', 'use', 'tech_type', 'phys_area_ha', 'harv_area_ha', 'yield_kgha')]
+} 
+
+sprd_dat <- SprdFun(SPAM_dat_tidy)
+write.csv(sprd_dat_fixd, here("Input_dat","SPAMspread_all_fixd.csv"), row.names=T)
+
+
diff --git a/Treenuts/TreenutsGAUL.R b/Treenuts/TreenutsGAUL.R
new file mode 100644
index 0000000000000000000000000000000000000000..2d1c34659713fbb7694d01e2cd212126e133f707
--- /dev/null
+++ b/Treenuts/TreenutsGAUL.R
@@ -0,0 +1,975 @@
+easypackages::packages(c("tidyverse", 'sf', 'raster','sp','dplyr','spatialEco',
+                         'exactextractr','tibble', 'here', "readr", "margrittr","tydr",
+                         "readxl", "sfheaders"))
+
+# New approach  -----------------------------------------------------------
+# Idea: 
+# 1. Load all the harvested area and yields (GeoTIFF) 
+# 2. Extract to point of the Spam cells
+# 3.       
+# 4. Then look at the proportion between Treenuts and the rest of spam crop others for each pixel
+
+# Zonal statistics  -------------------------------------------------------
+wd=setwd(here("Input_data"))
+
+# Load and check shapefile 
+shape_gadm=read_sf(dsn = wd, layer = "gadm36") # multipolygons containing all admin levels globally
+shape_gadm=dplyr::select(shape_gadm, 'UID', 'geometry', 'GID_0', 'GID_1')
+
+shape_gaul=read_sf(here("Input_data", "g2008_1.shp")) #actual GAUL map 
+shape_gaul=dplyr::select(shape_gaul, 'G2008_1_ID',  'ADM1_CODE', "ADM0_CODE", 'ADM0_NAME', 'ADM1_NAME', "CONTINENT", "REGION", "geometry")
+# class(shape_gadm)
+# shape_fil=filter(shape, GID_0 == 'FIN')
+dat_spam=read_csv(here("Input_data", "spam2010V2r0_global_Y_TA.csv"))
+dat_spam=dplyr::select(dat_spam, x,y, alloc_key, name_adm1, iso3)
+
+# Make SPAM spatial 
+# projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
+# proj_mercator = "+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs"
+proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below 
+proj_meter = "+datum=WGS84 +units=m +no_defs"
+dat_cent <- st_as_sf(x = dat_spam,                         
+                     coords = c("x", "y"),
+                     crs = proj)
+
+
+# Load treenuts and rest files 
+{
+# ylds - Nuts
+rast_walnut_yld = raster(here("Input_data","walnut_YieldPerHectare.tif"))
+{
+  
+# class(rast_walnut_yld)
+
+# rast_walnut_withoutNA = rast_walnut_yld
+# # 
+# mean(rast_walnut_yld$walnut_YieldPerHectare)
+# mean(rast_walnut_withoutNA$walnut_YieldPerHectare)
+# rast_walnut_withoutNA[rast_walnut_withoutNA$walnut_YieldPerHectare == 0] <- NA
+# check=as.raster(rast_walnut_withoutNA)
+# 
+# cellStats(rast_walnut_withoutNA$walnut_YieldPerHectare, mean)
+# 
+# cellStats(rast_walnut_withoutNA, mean)
+# cellStats(rast_walnut_yld, mean)
+# rast_walnut_withoutNA$walnut_YieldPerHectare
+}
+rast_almond_yld = raster(here("Input_data","almond_YieldPerHectare.tif"))
+rast_chestnut_yld = raster(here("Input_data","chestnut_YieldPerHectare.tif"))
+rast_cashew_yld = raster(here("Input_data","cashew_YieldPerHectare.tif"))
+rast_pistachio_yld = raster(here("Input_data","pistachio_YieldPerHectare.tif"))
+rast_kolanut_yld = raster(here("Input_data", "kolanut_YieldPerHectare.tif"))
+rast_hazelnut_yld = raster(here("Input_data","hazelnut_YieldPerHectare.tif"))
+rast_brazil_yld = raster(here("Input_data","brazil_YieldPerHectare.tif"))
+rast_areca_yld = raster(here("Input_data","areca_YieldPerHectare.tif"))
+rast_nutnes_yld  = raster(here("Input_data","nutnes_YieldPerHectare.tif"))
+{
+# 0's to Na's
+# rast_almond_yldNA=rast_almond_yld[rast_almond_yld$almond_YieldPerHectare == 0] <- NA
+
+# rast_almond_yld[rast_almond_yld$almond_YieldPerHectare == 0] <- NA
+
+# cellStats(rast_walnut_withoutNA$walnut_YieldPerHectare, min)
+# cellStats(rast_walnut_yld$walnut_YieldPerHectare, min)
+
+# rast_almond_yld
+}
+# yld - Rest
+rast_anise_yld = raster(here("Input_data","aniseetc_YieldPerHectare.tif"))
+rast_chili_yld = raster(here("Input_data","chilleetc_YieldPerHectare.tif"))
+rast_cinnamon_yld = raster(here("Input_data","cinnamon_YieldPerHectare.tif"))
+rast_ginger_yld = raster(here("Input_data",'ginger_YieldPerHectare.tif'))
+rast_hop_yld = raster(here("Input_data",'hop_YieldPerHectare.tif'))
+rast_mate_yld = raster(here("Input_data",'mate_YieldPerHectare.tif'))
+rast_nutmeg_yld = raster(here("Input_data","nutmeg_YieldPerHectare.tif"))
+rast_pepper_yld = raster(here("Input_data",'pepper_YieldPerHectare.tif'))
+rast_peppermint_yld = raster(here("Input_data",'peppermint_YieldPerHectare.tif'))
+rast_pyreth_yld = raster(here("Input_data",'pyrethrum_YieldPerHectare.tif'))
+rast_ramie_yld = raster(here("Input_data",'ramie_YieldPerHectare.tif'))
+rast_ruber_yld = raster(here("Input_data",'rubber_YieldPerHectare.tif'))
+rast_spices_yld = raster(here("Input_data",'spicenes_YieldPerHectare.tif'))
+rast_sugarnes_yld = raster(here("Input_data",'sugarnes_YieldPerHectare.tif'))
+rast_vanilla_yld = raster(here("Input_data",'vanilla_YieldPerHectare.tif'))
+
+# Production - treenuts
+
+rast_walnut_pd = raster(here("Input_data","walnut_Production.tif"))
+rast_almond_pd = raster(here("Input_data","almond_Production.tif"))
+rast_chestnut_pd = raster(here("Input_data","chestnut_Production.tif"))
+rast_cashew_pd = raster(here("Input_data","cashew_Production.tif"))
+rast_pistachio_pd = raster(here("Input_data","pistachio_Production.tif"))
+rast_kolanut_pd = raster(here("Input_data", "kolanut_Production.tif"))
+rast_hazelnut_pd = raster(here("Input_data","hazelnut_Production.tif"))
+rast_brazil_pd = raster(here("Input_data","brazil_Production.tif"))
+rast_areca_pd = raster(here("Input_data","areca_Production.tif"))
+rast_nutnes_pd  = raster(here("Input_data","nutnes_Production.tif"))
+# Production  -rest
+rast_anise_pd = raster(here("Input_data","aniseetc_Production.tif"))
+rast_chili_pd = raster(here("Input_data","chilleetc_Production.tif"))
+rast_cinnamon_pd = raster(here("Input_data","cinnamon_Production.tif"))
+rast_ginger_pd = raster(here("Input_data",'ginger_Production.tif'))
+rast_hop_pd = raster(here("Input_data",'hop_Production.tif'))
+rast_mate_pd = raster(here("Input_data",'mate_Production.tif'))
+rast_nutmeg_pd = raster(here("Input_data","nutmeg_Production.tif"))
+rast_pepper_pd = raster(here("Input_data",'pepper_Production.tif'))
+rast_peppermint_pd = raster(here("Input_data",'peppermint_Production.tif'))
+rast_pyreth_pd = raster(here("Input_data",'pyrethrum_Production.tif'))
+rast_ramie_pd = raster(here("Input_data",'ramie_Production.tif'))
+rast_ruber_pd = raster(here("Input_data",'rubber_Production.tif'))
+rast_spices_pd = raster(here("Input_data",'spicenes_Production.tif'))
+rast_sugarnes_pd = raster(here("Input_data",'sugarnes_Production.tif'))
+rast_vanilla_pd = raster(here("Input_data",'vanilla_Production.tif'))
+
+
+# harvested area - treentus 
+rast_walnut_ha = raster(here("Input_data","walnut_HarvestedAreaHectares.tif"))
+rast_almond_ha = raster(here("Input_data","almond_HarvestedAreaHectares.tif"))
+rast_chestnut_ha = raster(here("Input_data","chestnut_HarvestedAreaHectares.tif"))
+rast_cashew_ha = raster(here("Input_data","cashew_HarvestedAreaHectares.tif"))
+rast_pistachio_ha = raster(here("Input_data","pistachio_HarvestedAreaHectares.tif"))
+rast_kolanut_ha = raster(here("Input_data", "kolanut_HarvestedAreaHectares.tif"))
+rast_hazelnut_ha = raster(here("Input_data","hazelnut_HarvestedAreaHectares.tif"))
+rast_brazil_ha = raster(here("Input_data","brazil_HarvestedAreaHectares.tif"))
+rast_areca_ha = raster(here("Input_data","areca_HarvestedAreaHectares.tif"))
+rast_nutnes_ha  = raster(here("Input_data","nutnes_HarvestedAreaHectares.tif"))
+# HarvestedAreaHectares  -rest
+rast_anise_ha = raster(here("Input_data","aniseetc_HarvestedAreaHectares.tif"))
+rast_chili_ha = raster(here("Input_data","chilleetc_HarvestedAreaHectares.tif"))
+rast_cinnamon_ha = raster(here("Input_data","cinnamon_HarvestedAreaHectares.tif"))
+rast_ginger_ha = raster(here("Input_data",'ginger_HarvestedAreaHectares.tif'))
+rast_hop_ha = raster(here("Input_data",'hop_HarvestedAreaHectares.tif'))
+rast_mate_ha = raster(here("Input_data",'mate_HarvestedAreaHectares.tif'))
+rast_nutmeg_ha = raster(here("Input_data","nutmeg_HarvestedAreaHectares.tif"))
+rast_pepper_ha = raster(here("Input_data",'pepper_HarvestedAreaHectares.tif'))
+rast_peppermint_ha = raster(here("Input_data",'peppermint_HarvestedAreaHectares.tif'))
+rast_pyreth_ha = raster(here("Input_data",'pyrethrum_HarvestedAreaHectares.tif'))
+rast_ramie_ha = raster(here("Input_data",'ramie_HarvestedAreaHectares.tif'))
+rast_ruber_ha = raster(here("Input_data",'rubber_HarvestedAreaHectares.tif'))
+rast_spices_ha = raster(here("Input_data",'spicenes_HarvestedAreaHectares.tif'))
+rast_sugarnes_ha = raster(here("Input_data",'sugarnes_HarvestedAreaHectares.tif'))
+rast_vanilla_ha = raster(here("Input_data",'vanilla_HarvestedAreaHectares.tif'))
+}
+
+# yld_list=list(rast_almond_yld, rast_cashew_yld, rast_chestnut_yld, rast_hazelnut_yld, rast_kolanut_yld, rast_pistachio_yld, rast_walnut_yld)
+# stack_yld=stack(rast_almond_yld, rast_cashew_yld, rast_chestnut_yld, rast_hazelnut_yld, rast_kolanut_yld, rast_pistachio_yld, rast_walnut_yld)
+# stack_ha=stack(rast_almond_ha, rast_cashew_ha, rast_chestnut_ha, rast_hazelnut_ha, rast_kolanut_ha, rast_pistachio_ha, rast_walnut_ha)
+# 
+# dat$mean.nfertilizer_global
+# summary(dat$mean.nfertilizer_global)
+# 
+
+# Perform zonal statistics ------------------------------------------------
+# FunZone_yld=function(rast,shape, na.rm=T){ # raster and vector
+#   c=zonal.stats(shape, rast, stats = c("Fun_nzmean")) 
+#   c$UID <- shape$UID  # assigning the identifier of the original df to the extracted df
+#   merge(shape, c, by.x = 'UID', by.y = 'UID') # merge the two files together
+# }
+# GADM
+FunZone_sum=function(rast){ # raster and vector
+  c=zonal.stats(shape, rast, stats = c("sum")) 
+  c$UID <- shape$UID  # assigning the identifier of the original df to the extracted df
+  dat <- merge(shape, c, by.x = 'UID', by.y = 'UID') # merge the two files together
+}
+
+# GAUL
+FunZone_sum=function(rast){ # raster and vector
+  c=zonal.stats(shape_gaul, rast, stats = c("sum")) 
+  c$G2008_1_ID <- shape_gaul$G2008_1_ID  # assigning the identifier of the original df to the extracted df
+  dat <- merge(shape_gaul, c, by.x = 'G2008_1_ID', by.y = 'G2008_1_ID') # merge the two files together
+}
+
+# 
+# original ----------------------------------------------------------------
+FunZone=function(rast, shape, na.rm=T){ # raster and vector
+  c=zonal.stats(shape, rast, stats = c("mean"))
+  c$UID <- shape$UID  # assigning the identifier of the original df to the extracted df
+  dat <- merge(shape, c, by.x = 'UID', by.y = 'UID') # merge the two files togethere
+}
+
+# # test ignoring 0s --------------------------------------------------------
+# nzmean <- function(x) {
+#   zvals <- x==0
+#   if (all(zvals)) 0 else mean(x[!zvals])
+# }
+# 
+# Fun_nzmean=function(r, na.rm=F){
+#   mean(r[r!=0])
+#   }
+# 
+# FunZone_yld_0=function(rast,shape){ # raster and vector
+#   c=zonal.stats(shape, rast, stats = c("nzmean")) 
+#   c$UID <- shape$UID  # assigning the identifier of the original df to the extracted df
+#   merge(shape, c, by.x = 'UID', by.y = 'UID') # merge the two files together
+# }
+# 
+# # Test raster 
+# v2=raster::extract(rast_walnut_yld,shape_fil,fun='Fun_nzmean')
+
+
+# Yields ------------------------------------------------------------------
+# Apply zonal stats to treenuts 
+ex_walnut_yld = FunZone_yld(rast_walnut_yld,shape_fil)
+ex_walnut_yld_0 = FunZone_yld(rast_walnut_yld,shape_fil)
+
+# mean(ex_walnut_yld$mean.walnut_YieldPerHectare)
+# mean(ex_walnut_yld_0$mean.walnut_YieldPerHectare)
+# mean(ex_walnut_withNA_FIn$mean.walnut_YieldPerHectare)
+
+ex_walnut_withNA_FIn = FunZone_yld(rast_walnut_withoutNA,shape_fil)
+ex_walnut_yld$nzmean.walnut_YieldPerHectare
+# max(ex_walnut_withNA_FIn$mean.walnut_YieldPerHectare)
+
+
+ex_almond_yld = FunZone_yld(rast_almond_yld)
+ex_chestnut_yld = FunZone_yld(rast_chestnut_yld)
+ex_cashew_yld = FunZone_yld(rast_cashew_yld)
+ex_pistachio_yld = FunZone_yld(rast_pistachio_yld)
+ex_kolanut_yld = FunZone_yld(rast_kolanut_yld)
+ex_hazelnut_yld = FunZone_yld(rast_hazelnut_yld)
+ex_brazil_yld = FunZone_yld(rast_brazil_yld)
+ex_areca_yld = FunZone_yld(rast_areca_yld)
+ex_nutnes_yld = FunZone_yld(rast_nutnes_yld)
+
+# Apply zonal stats to rest 
+ex_anise_yld = FunZone_yld(rast_anise_yld)
+ex_chili_yld = FunZone_yld(rast_chili_yld)
+ex_cinnamon_yld =FunZone_yld(rast_cinnamon_yld) 
+ex_ginger_yld = FunZone_yld(rast_ginger_yld)
+ex_hop_yld = FunZone_yld(rast_hop_yld)
+ex_mate_yld = FunZone_yld(rast_mate_yld)
+ex_nutmeg_yld = FunZone_yld(rast_nutmeg_yld)
+ex_pepper_yld = FunZone_yld(rast_pepper_yld)
+ex_peppermint_yld = FunZone_yld(rast_peppermint_yld)
+ex_pyreth_yld = FunZone_yld(rast_pyreth_yld)
+ex_ramie_yld = FunZone_yld(rast_ramie_yld)
+ex_ruber_yld = FunZone_yld(rast_ruber_yld)
+ex_spices_yld = FunZone_yld(rast_spices_yld)
+ex_sugarnes_yld = FunZone_yld(rast_sugarnes_yld)
+ex_vanilla_yld = FunZone_yld(rast_vanilla_yld)
+
+# Join  columns (cbind)
+lst.ex.yld = list(ex_walnut_yld,ex_almond_yld,ex_chestnut_yld,ex_cashew_yld,ex_pistachio_yld, ex_kolanut_yld, ex_hazelnut_yld, ex_areca_yld, ex_brazil_yld, ex_nutnes_yld, 
+                  ex_anise_yld, ex_chili_yld, ex_cinnamon_yld, ex_ginger_yld, ex_hop_yld, ex_mate_yld, ex_nutmeg_yld, ex_pepper_yld, ex_peppermint_yld, ex_pyreth_yld, ex_ramie_yld, ex_ruber_yld, ex_spices_yld, ex_sugarnes_yld, ex_vanilla_yld)
+dat_ex.yld = do.call("cbind", lst.ex.yld) # merging all tech types
+# head(dat_ex.yld)
+
+treenut_yld = dat_ex.yld %>% 
+  select(UID, GID_0,GID_1,mean.walnut_YieldPerHectare, 
+       mean.almond_YieldPerHectare,
+       mean.chestnut_YieldPerHectare,
+       mean.cashew_YieldPerHectare,
+       mean.pistachio_YieldPerHectare,
+       mean.kolanut_YieldPerHectare,
+       mean.hazelnut_YieldPerHectare, 
+       mean.areca_YieldPerHectare, 
+       mean.brazil_YieldPerHectare, 
+       mean.nutnes_YieldPerHectare, 
+       geometry) %>% 
+  rowwise()%>%
+  mutate(treenut_yld= rowmean(c_across(mean.almond_YieldPerHectare:mean.areca_YieldPerHectare)))%>%
+  mutate(treenut_yld= rowmean(c_across(mean.anise_YieldPerHectare:mean.vanilla_YieldPerHectare)))
+  
+  
+# class(treenut_yld)
+# write.csv(treenut_yld, 'D:/Raw_dat/Raw_dat/Ramakutty/HarvestedAreaYield175Crops_Geotiff/HarvestedAreaYield175Crops_Geotiff/Treenuts', row.names=T) 
+st_write(treenut_yld, here("Input_data", "GADM_treenut.shp"), driver="ESRI Shapefile")  # create to a shapefile 
+
+
+# Production  -------------------------------------------------------------
+
+ex_walnut_pd = FunZone_sum(rast_walnut_pd)
+ex_almond_pd = FunZone_sum(rast_almond_pd)
+ex_chestnut_pd = FunZone_sum(rast_chestnut_pd)
+ex_cashew_pd = FunZone_sum(rast_cashew_pd)
+ex_pistachio_pd = FunZone_sum(rast_pistachio_pd)
+ex_kolanut_pd = FunZone_sum(rast_kolanut_pd)
+ex_hazelnut_pd = FunZone_sum(rast_hazelnut_pd)
+ex_brazil_pd = FunZone_sum(rast_brazil_pd)
+ex_areca_pd = FunZone_sum(rast_areca_pd)
+ex_nutnes_pd = FunZone_sum(rast_nutnes_pd)
+
+# Rest
+ex_anise_pd = FunZone_sum(rast_anise_pd)
+ex_chili_pd = FunZone_sum(rast_chili_pd)
+ex_cinnamon_pd =FunZone_sum(rast_cinnamon_pd) 
+ex_ginger_pd = FunZone_sum(rast_ginger_pd)
+ex_hop_pd = FunZone_sum(rast_hop_pd)
+ex_mate_pd = FunZone_sum(rast_mate_pd)
+ex_nutmeg_pd = FunZone_sum(rast_nutmeg_pd)
+ex_pepper_pd = FunZone_sum(rast_pepper_pd)
+ex_peppermint_pd = FunZone_sum(rast_peppermint_pd)
+ex_pyreth_pd = FunZone_sum(rast_pyreth_pd)
+ex_ramie_pd = FunZone_sum(rast_ramie_pd)
+ex_ruber_pd = FunZone_sum(rast_ruber_pd)
+ex_spices_pd = FunZone_sum(rast_spices_pd)
+ex_sugarnes_pd = FunZone_sum(rast_sugarnes_pd)
+ex_vanilla_pd = FunZone_sum(rast_vanilla_pd)
+
+# list all treenuts
+lst.ex.pd = list(ex_walnut_pd,ex_almond_pd,ex_chestnut_pd,ex_cashew_pd,ex_pistachio_pd, ex_kolanut_pd, ex_hazelnut_pd, ex_areca_pd, ex_nutnes_pd,
+                 ex_anise_pd, ex_chili_pd, ex_cinnamon_pd, ex_ginger_pd, ex_hop_pd, ex_mate_pd, ex_nutmeg_pd, ex_pepper_pd, ex_peppermint_pd, ex_pyreth_pd, ex_ramie_pd, ex_ruber_pd, ex_spices_pd, ex_sugarnes_pd, ex_vanilla_pd)
+dat_ex.pd = do.call("cbind", lst.ex.pd) # merging all tech types
+
+# list rest
+
+treenut_pd = dat_ex.pd %>% 
+  # dplyr::select(UID, GID_0,GID_1,sum.walnut_Production, 
+    dplyr::select(G2008_1_ID, ADM1_CODE,ADM0_CODE, ADM1_NAME,ADM0_NAME, CONTINENT, REGION, 
+         sum.walnut_Production,   
+         sum.almond_Production,
+         sum.chestnut_Production,
+         sum.cashew_Production,
+         sum.pistachio_Production,
+         sum.kolanut_Production,
+         sum.hazelnut_Production,
+         sum.aniseetc_Production,
+         sum.chilleetc_Production,
+         sum.cinnamon_Production,
+         sum.ginger_Production,
+         sum.hop_Production,
+         sum.mate_Production, 
+         sum.nutmeg_Production,
+         sum.pepper_Production,
+         sum.peppermint_Production,
+         sum.pyrethrum_Production,
+         sum.ramie_Production,
+         sum.rubber_Production,
+         sum.spicenes_Production,
+         sum.sugarnes_Production,
+         sum.vanilla_Production,
+         geometry) %>% 
+  group_by(ADM1_CODE, ADM0_CODE) %>% 
+  mutate(treenut_pd= sum(sum.walnut_Production, 
+                          sum.almond_Production,
+                          sum.chestnut_Production,
+                          sum.cashew_Production,
+                          sum.pistachio_Production,
+                          sum.kolanut_Production,
+                          sum.hazelnut_Production))%>%
+  mutate(rest_pd = sum(sum.aniseetc_Production,
+                       sum.chilleetc_Production,
+                       sum.cinnamon_Production,
+                       sum.ginger_Production,
+                       sum.hop_Production,
+                       sum.mate_Production, 
+                       sum.nutmeg_Production,
+                       sum.pepper_Production,
+                       sum.peppermint_Production,
+                       sum.pyrethrum_Production,
+                       sum.ramie_Production,
+                       sum.rubber_Production,
+                       sum.spicenes_Production,
+                       sum.sugarnes_Production,
+                       sum.vanilla_Production)) %>% 
+  dplyr::select(G2008_1_ID, ADM1_CODE,ADM0_CODE, ADM1_NAME, ADM0_NAME, CONTINENT, REGION, treenut_pd, rest_pd)
+
+         
+# class(treenut_yld)
+# write.csv(treenut_yld, 'D:/Raw_dat/Raw_dat/Ramakutty/HarvestedAreaYield175Crops_Geotiff/HarvestedAreaYield175Crops_Geotiff/Treenuts', row.names=T) 
+st_write(treenut_pd, here("Input_data", "GAUL_treenut_rest_pd.shp"), driver="ESRI Shapefile")  # create to a shapefile
+treenut_pd= read_sf(here("Input_data", "GAUL_treenut_rest_pd,shp", "GAUL_treenut_rest_pd.shp"))
+
+# Harvested area - Treenuts -----------------------------------------------
+
+ex_walnut_ha = FunZone_sum(rast_walnut_ha)
+ex_almond_ha = FunZone_sum(rast_almond_ha)
+ex_chestnut_ha = FunZone_sum(rast_chestnut_ha)
+ex_cashew_ha = FunZone_sum(rast_cashew_ha)
+ex_pistachio_ha = FunZone_sum(rast_pistachio_ha)
+ex_kolanut_ha = FunZone_sum(rast_kolanut_ha)
+ex_hazelnut_ha = FunZone_sum(rast_hazelnut_ha)
+ex_brazil_ha = FunZone_sum(rast_brazil_ha)
+ex_areca_ha = FunZone_sum(rast_areca_ha)
+ex_nutnes_ha = FunZone_sum(rast_nutnes_ha)
+
+# Rest
+ex_anise_ha = FunZone_sum(rast_anise_ha)
+ex_chili_ha = FunZone_sum(rast_chili_ha)
+ex_cinnamon_ha =FunZone_sum(rast_cinnamon_ha) 
+ex_ginger_ha = FunZone_sum(rast_ginger_ha)
+ex_hop_ha = FunZone_sum(rast_hop_ha)
+ex_mate_ha = FunZone_sum(rast_mate_ha)
+ex_nutmeg_ha = FunZone_sum(rast_nutmeg_ha)
+ex_pepper_ha = FunZone_sum(rast_pepper_ha)
+ex_peppermint_ha = FunZone_sum(rast_peppermint_ha)
+ex_pyreth_ha = FunZone_sum(rast_pyreth_ha)
+ex_ramie_ha = FunZone_sum(rast_ramie_ha)
+ex_ruber_ha = FunZone_sum(rast_ruber_ha)
+ex_spices_ha = FunZone_sum(rast_spices_ha)
+ex_sugarnes_ha = FunZone_sum(rast_sugarnes_ha)
+ex_vanilla_ha = FunZone_sum(rast_vanilla_ha)
+  
+# list all treenuts
+lst.ex.ha = list(ex_walnut_ha,ex_almond_ha,ex_chestnut_ha,ex_cashew_ha,ex_pistachio_ha, ex_kolanut_ha, ex_hazelnut_ha, ex_areca_ha, ex_nutnes_ha,
+                 ex_anise_ha, ex_chili_ha, ex_cinnamon_ha, ex_ginger_ha, ex_hop_ha, ex_mate_ha, ex_nutmeg_ha, ex_pepper_ha, ex_peppermint_ha, ex_pyreth_ha, ex_ramie_ha, ex_ruber_ha, ex_spices_ha, ex_sugarnes_ha, ex_vanilla_ha)
+dat_ex.ha = do.call("cbind", lst.ex.ha) # merging all tech types
+
+# list rest
+
+treenut_ha = dat_ex.ha %>% 
+  dplyr::select(G2008_1_ID, ADM1_CODE,ADM0_CODE, ADM1_NAME, ADM0_NAME, CONTINENT, REGION,
+                sum.walnut_HarvestedAreaHectares, 
+         sum.almond_HarvestedAreaHectares,
+         sum.chestnut_HarvestedAreaHectares,
+         sum.cashew_HarvestedAreaHectares,
+         sum.pistachio_HarvestedAreaHectares,
+         sum.kolanut_HarvestedAreaHectares,
+         sum.hazelnut_HarvestedAreaHectares,
+         sum.aniseetc_HarvestedAreaHectares,
+         sum.chilleetc_HarvestedAreaHectares,
+         sum.cinnamon_HarvestedAreaHectares,
+         sum.ginger_HarvestedAreaHectares,
+         sum.hop_HarvestedAreaHectares,
+         sum.mate_HarvestedAreaHectares, 
+         sum.nutmeg_HarvestedAreaHectares,
+         sum.pepper_HarvestedAreaHectares,
+         sum.peppermint_HarvestedAreaHectares,
+         sum.pyrethrum_HarvestedAreaHectares,
+         sum.ramie_HarvestedAreaHectares,
+         sum.rubber_HarvestedAreaHectares,
+         sum.spicenes_HarvestedAreaHectares,
+         sum.sugarnes_HarvestedAreaHectares,
+         sum.vanilla_HarvestedAreaHectares,
+         geometry) %>% 
+  group_by(ADM0_CODE, ADM1_CODE) %>% 
+  mutate(treenut_ha = sum(sum.walnut_HarvestedAreaHectares, 
+         sum.almond_HarvestedAreaHectares,
+         sum.chestnut_HarvestedAreaHectares,
+         sum.cashew_HarvestedAreaHectares,
+         sum.pistachio_HarvestedAreaHectares,
+         sum.kolanut_HarvestedAreaHectares,
+         sum.hazelnut_HarvestedAreaHectares)) %>% 
+  mutate(rest_ha = sum(sum.aniseetc_HarvestedAreaHectares,
+                       sum.chilleetc_HarvestedAreaHectares,
+                       sum.cinnamon_HarvestedAreaHectares,
+                       sum.ginger_HarvestedAreaHectares,
+                       sum.hop_HarvestedAreaHectares,
+                       sum.mate_HarvestedAreaHectares, 
+                       sum.nutmeg_HarvestedAreaHectares,
+                       sum.pepper_HarvestedAreaHectares,
+                       sum.peppermint_HarvestedAreaHectares,
+                       sum.pyrethrum_HarvestedAreaHectares,
+                       sum.ramie_HarvestedAreaHectares,
+                       sum.rubber_HarvestedAreaHectares,
+                       sum.spicenes_HarvestedAreaHectares,
+                       sum.sugarnes_HarvestedAreaHectares,
+                       sum.vanilla_HarvestedAreaHectares)) %>% 
+  dplyr::select(G2008_1_ID, ADM1_CODE,ADM0_CODE, ADM1_NAME, ADM0_NAME, CONTINENT, REGION, treenut_ha, rest_ha)
+
+
+
+# Joining SPAM with GAUL L -----------------------------------------------------------
+dat_spam=read_csv(here("input_data", "spam2010V2r0_global_Y_TA.csv"))
+dat_spam=dplyr::select(dat_spam, x,y, alloc_key, name_adm1, iso3, name_cntr)
+
+# Make SPAM spatial 
+# projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
+# proj_mercator = "+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs"
+proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below 
+dat_cent <- st_as_sf(x = dat_spam,                         
+                     coords = c("x", "y"),
+                     crs = proj)
+
+shape_gaul=read_sf(here("Input_data", "g2008_1.shp")) #actual GAUL map 
+
+
+# LOAD treenut production and harvested area 
+treenut_ha= read_sf(here("Input_data", "GAUL_treenut_rest_ha,shp", "GAUL_treenut_rest_ha.shp"))
+treenut_pd= read_sf(here("Input_data", "GAUL_treenut_rest_pd,shp", "GAUL_treenut_rest_pd.shp"))
+# write_sf(sph.all, dsn = here("Input_data", driver = "ESRI shapefile"))
+sph.prod.tib = treenut_pd %>% 
+  as_tibble() %>% 
+  dplyr::select(G2008_1_ID, ADM1_CODE, ADM0_CODE, ADM0_NAME, treenut_pd, rest_pd,geometry)
+
+sph.harv = treenut_ha %>% 
+  as_tibble() %>% 
+  dplyr::select(G2008_1_ID, ADM1_CODE, ADM0_CODE, ADM0_NAME, treenut_ha, rest_ha,geometry)
+# loading SPAM-GADM join
+
+# Creating a spatial join between the SPAM (pixel) and GAUL 2008
+SPAM_GAUL_JOIN = dat_cent %>% 
+    st_join(., shape_gaul, join = st_within, left = T)
+st_write(SPAM_GAUL_JOIN, here("Input_data", "GAUL2008_SPAM_LINK"), driver="ESRI Shapefile")  # From here go to script 
+SPAM_GAUL_JOIN=read_sf(here("Input_data", "GAUL2008_SPAM_LINK"))
+
+# GAUL aggregation and transformation  ------------------------------------
+
+# Independent -- Merging treenuts with SPAM -------------------------------
+# LOAD data from previous step (due to memory overuse)
+setwd(here('Input_data', "Rectypes_full_cropspread"))
+yld_dat <- read_csv('Yield_full_cropspread.csv')
+prd_dat <- read_csv('Production_full_cropspread.csv')
+har_dat <- read_csv('Harv_area_spatial_spread_SPAM.csv')
+phy_dat <- read_csv('Phys_area_full_cropspread.csv')
+
+# Aggregate results to adm1 level and calculate ratio between nuts and rest
+val_pd = treenut_pd %>% 
+  group_by(ADM1_CODE, ADM0_CODE) %>% 
+  summarise(sum_pd = sum(treenut_pd, keep = T),
+            sum_rest_pd = sum(rest_pd)) 
+  write_sf(val_pd, here("Input_data", "val_pd.shp"))
+
+val_pd=read_sf(dsn=here("Input_data", "val_pd.shp"))
+val_pd=rename(val_pd,ADM1_CODE = ADM1_CO, #when loading the data run rename function
+       ADM0_CODE = ADM0_CO, 
+       sum_rest_pd = sm_rst_)
+
+
+val_ha = treenut_ha %>% 
+  group_by(ADM1_CODE, ADM0_CODE) %>% 
+  summarise(sum_ha = sum(treenut_ha, keep = T),
+            sum_rest_ha = sum(rest_ha)) #%>% 
+  write_sf(val_ha, here("Input_data", "val_ha.shp"))
+
+val_ha=read_sf(here("Input_data", "val_ha.shp"))
+val_ha = rename(val_ha,
+                ADM1_CODE = ADM1_CO,
+                ADM0_CODE = ADM0_CO,
+                sum_rest_ha = sm_rst_)
+
+
+# Join of harvested area and production 
+join_ha_pd_gaul = val_pd %>%
+  group_by(ADM1_CODE, ADM0_CODE) %>%
+  as_tibble() %>% 
+  left_join(., val_ha, by="ADM1_CODE") %>% 
+  left_join(., shape_gaul, by = "ADM1_CODE") %>% 
+  rowwise() %>% 
+  mutate(tnuts_ratio_pd = sum_pd/sum(sum_pd, sum_rest_pd),
+         tnuts_ratio_ha = sum_ha/sum(sum_ha, sum_rest_ha)) %>% 
+  dplyr::select(G2008_1_ID, ADM0_CODE, ADM0_NAME, ADM0_NAME, ADM1_NAME, CONTINENT, REGION,
+                sum_pd, sum_rest_pd, sum_ha, sum_rest_ha, tnuts_ratio_pd, tnuts_ratio_ha, geometry) #%>% 
+st_write(., here("Input_data", "GAUL_tnuts_rest_ratio_ha_pd"), driver="ESRI Shapefile")  # From here go to script 
+join_ha_pd_gaul=read_sf(here("Input_data", "GAUL_tnuts_rest_ratio_ha_pd"))
+join_ha_pd_gaul=join_ha_pd_gaul %>% as_tibble() %>%  write_csv(here("Input_data", "join_ha_pd_gaul.csv")) # strange transformation. Creates weired rations in csv file
+
+# 
+# # Production
+# # Join of harvested area and production
+# join_pd_gaul = val_pd %>%
+#   group_by(ADM1_CODE, ADM0_CODE) %>%
+#   as_tibble() %>%
+#   # left_join(., val_ha, by="ADM1_CODE") %>%
+#   left_join(., shape_gaul, by = "ADM1_CODE") %>%
+#   rowwise() %>%
+#   mutate(tnuts_ratio_pd = sum_pd/sum(sum_pd, sum_rest_pd)) %>% #,
+#          # tnuts_ratio_ha = sum_ha/sum(sum_ha, sum_rest_ha)) %>%
+#   dplyr::select(G2008_1_ID, ADM0_CODE.x, ADM0_NAME, ADM0_NAME, ADM1_NAME, CONTINENT, REGION,
+#                 sum_pd, sum_rest_pd,
+#                 # sum_ha, sum_rest_ha,
+#                 tnuts_ratio_pd,
+#                 # tnuts_ratio_ha,
+#                 geometry.y) %>%
+# st_write(., here("Input_data", "GAUL_tnuts_rest_ratio_pd"), driver="ESRI Shapefile")  # From here go to script
+# join_pd_gaul=read_sf(here("Input_data", "GAUL_tnuts_rest_ratio_ha_pd"))
+# 
+# # Harvested area
+# join_ha_gaul = val_ha %>%
+#   group_by(ADM1_CODE, ADM0_CODE) %>%
+#   as_tibble() %>%
+#   # left_join(., val_ha, by="ADM1_CODE") %>%
+#   left_join(., shape_gaul, by = "ADM1_CODE") %>%
+#   rowwise() %>%
+#   mutate(
+#   # tnuts_ratio_pd = sum_pd/sum(sum_pd, sum_rest_pd),
+#          tnuts_ratio_ha = sum_ha/sum(sum_ha, sum_rest_ha)) %>%
+#   dplyr::select(G2008_1_ID, ADM0_CODE.x, ADM0_NAME, ADM0_NAME, ADM1_NAME, CONTINENT, REGION,
+#                 # sum_pd, sum_rest_pd,
+#                 sum_ha, sum_rest_ha,
+#                 # tnuts_ratio_pd,
+#                 tnuts_ratio_ha, geometry.y)# %>%
+#   st_write(., here("Input_data", "GAUL_tnuts_rest_ratio_ha"), driver="ESRI Shapefile")  # From here go to script
+# 
+# join_ha_gaul=read_sf(here("Input_data", "GAUL_tnuts_rest_ratio_ha_ha"))
+# 
+
+
+
+# Treenuts GAUL pixel  ---------------------------------------------------------------
+# Treenuts are loaded from the treenuts script
+# Production
+# # rbenchmark::benchmark(
+# prod_dat_gaul_spam = SPAM_GAUL_JOIN %>% 
+#   as_tibble() %>% 
+#   left_join(.,sph.prod.tib, by="G2008_1_ID") %>% 
+#   left_join(.,prd_dat, by="alloc_key") #%>% 
+#   mutate(tnuts = trnt_r_*rest) %>% 
+#   mutate(rest.1 = (1-trnt_r_)*rest) %>% 
+#   dplyr::select(alloc_key, UID, GID_0.x, GID_1.x, rec_type, tech_type, unit,
+#                 whea:rest, tnuts, rest.1) %>% 
+#   write.csv(here("Input_data", "prod_tnuts_spam_gaul.csv"))
+
+
+# c=read_csv(here("Input_data", "prod_tnuts_spam_gaul.csv"))
+# 
+# # Approach validation > do the numbers make sense?
+# xy=dat_join_prod %>% 
+#   group_by(tnuts) %>%
+#   top_n(n=10) %>% 
+#   dplyr::select(rest, tnuts, rest.1)
+# 
+# write_csv(dat_join_prod, here("Input_data", "dat_treenut_prod.csv"))
+# # st_as_sf()
+
+# Production 
+# rbenchmark::benchmark(
+# treenut_pd = as_tibble(join_ha_pd_gaul) %>% 
+#   rename(G2008_1_ID=G2008_1) %>% 
+# left_join(.,as_tibble(SPAM_GAUL_JOIN), by="G2008_1_ID") %>% 
+#   right_join(.,prd_dat, by="alloc_key")%>% 
+# dplyr::select(alloc_key, G2008_1_ID, ADM1_CODE, ADM1_NAME, ADM0_CODE, ADM0_NAME,
+#               name_cntr, name_adm1.x, iso3.x, rec_type, tech_type, unit, whea:rest, tnuts_ratio_pd) %>%
+#   rename(
+#     # ADM1_NAME = ADM1_NAME.x,
+#   #        ADM0_CODE = ADM0_CODE.x,
+#   #        ADM0_NAME = ADM0_NAME.x,
+#          name_adm1 = name_adm1.x, 
+#          iso3 =iso3.x) %>%
+#   write.csv(here("Input_data", "prod_tnuts_spam_gaul1.csv"))
+
+treenut_pd = as_tibble(join_ha_pd_gaul) %>% 
+  # rename(G2008_1_ID = G2008_1) %>%
+  left_join(.,as_tibble(SPAM_GAUL_JOIN), by="G2008_1_ID") %>% 
+  right_join(., prd_dat, by="alloc_key") %>% 
+  dplyr::select(alloc_key, G2008_1_ID, ADM1_CODE, ADM1_NAME.x, ADM0_CODE.x, ADM0_NAME.x,
+                name_cntr, name_adm1.x, iso3.x, rec_type, tech_type, unit, whea:rest, tnuts_ratio_pd) %>%
+  rename(name_adm1 = name_adm1.x,
+         ADM1_NAME = ADM1_NAME.x,
+         ADM0_CODE =ADM0_CODE.x,
+         ADM0_NAME =ADM0_NAME.x,
+         iso3 =iso3.x) %>%
+  write_csv(here("Input_data", "prod_tnuts_spam_gaul1.csv"))
+treenut_pd = read.csv(here("Input_data","prod_tnuts_spam_gaul1.csv"))
+
+
+# Harvested area 
+treenut_ha = as_tibble(join_ha_pd_gaul) %>% 
+  # rename(G2008_1_ID = G2008_1) %>%
+  left_join(.,as_tibble(SPAM_GAUL_JOIN), by="G2008_1_ID") %>% 
+  right_join(.,har_dat , by="alloc_key") %>% 
+  dplyr::select(alloc_key, G2008_1_ID, ADM1_CODE, ADM1_NAME.x, ADM0_CODE.x, ADM0_NAME.x,
+                name_cntr, name_adm1.x, iso3.x, rec_type, tech_type, unit, whea:rest, tnuts_ratio_ha) %>%
+  rename(name_adm1 = name_adm1.x,
+         ADM1_NAME = ADM1_NAME.x,
+         ADM0_CODE =ADM0_CODE.x,
+         ADM0_NAME =ADM0_NAME.x,
+         iso3 =iso3.x) %>%
+  write.csv(here("Input_data", "ha_tnuts_spam_gaul1.csv"))
+treenut_ha = read_csv(here("Input_data","ha_tnuts_spam_gaul1.csv"))
+
+# physical area 
+treenut_phys = as_tibble(join_ha_pd_gaul) %>% 
+  # rename(G2008_1_ID=G2008_1) %>%
+  left_join(.,as_tibble(SPAM_GAUL_JOIN), by="G2008_1_ID") %>% 
+  right_join(.,phy_dat, by="alloc_key") %>% 
+  dplyr::select(alloc_key, G2008_1_ID, ADM1_CODE, ADM1_NAME.x, ADM0_CODE.x, ADM0_NAME.x,
+                name_cntr, name_adm1.x, iso3.x, rec_type, tech_type, unit, whea:rest, tnuts_ratio_ha) %>%
+  rename(name_adm1 = name_adm1.x,
+         ADM0_CODE =ADM0_CODE.x,
+         ADM0_NAME =ADM0_NAME.x,
+         ADM1_NAME = ADM1_NAME.x,
+         iso3 =iso3.x) %>%
+  write.csv(here("Input_data", "phys_tnuts_spam_gaul1.csv"))
+treenut_phys = read.csv(here("Input_data","phys_tnuts_spam_gaul1.csv"))
+
+
+
+# Approach: Aim is to get dinal datasets on pixel level > Adm1 level
+# 1_Cbind the three datasets 
+# 2_Gather crops 
+# 3_Spread levels rec type
+# 4_Calculate yields after unit transformation  
+
+
+# # GThis data contains all crops spread > Values correspond to the unaggregated SPAM dataset 
+dat.phys = read_csv(here("Input_data", "phys_tnuts_spam_gaul1.csv"))  # this is pixel level spam
+dat.prod = read_csv(here("Input_data", "prod_tnuts_spam_gaul1.csv"))  # this is pixel level spam
+dat.harv = read_csv(here("Input_data", "ha_tnuts_spam_gaul1.csv"))    # this is pixel level spam
+
+# calulating treenuts and rest from share ratio
+dat_prod = treenut_pd %>%  mutate(tnuts = rest*tnuts_ratio_pd,
+                                rest = rest*(1-tnuts_ratio_pd)) %>%
+  dplyr::select(-c(tnuts_ratio_pd)) %>% 
+  write.csv(here("Input_data", "prod_tnuts_rest_spam_gaul_FINAL1.csv"))
+
+dat_harv = dat.harv %>%  mutate(tnuts = rest*tnuts_ratio_ha,
+                                rest = rest*(1-tnuts_ratio_ha)) %>%
+  dplyr::select(-c(tnuts_ratio_ha))%>% 
+  write.csv(here("Input_data", "harv_tnuts_rest_spam_gaul_FINAL.csv"))
+
+dat_phys = dat.phys %>%  mutate(tnuts = rest*tnuts_ratio_ha,
+                                rest = rest*(1-tnuts_ratio_ha)) %>%
+  dplyr::select(-c(tnuts_ratio_ha)) %>%    
+  write.csv(here("Input_data", "phys_tnuts_rest_spam_gaul_FINAL.csv"))
+
+# This data contains  the final treenuts isolated from the initial rest. Rest here means now the herbs, etc
+dat_phys = read_csv(here("Input_data", "phys_tnuts_rest_spam_gaul_FINAL.csv")) #tnutsratios good here -checked
+dat_prod = read_csv(here("Input_data", "prod_tnuts_rest_spam_gaul_FINAL1.csv"))
+dat_harv = read_csv(here("Input_data", "harv_tnuts_rest_spam_gaul_FINAL.csv"))
+# head(dat.phys)
+# head(dat.prod)
+# head(dat.harv)
+
+# lst.tnuts = list(dat.harv, dat.phys, dat.prod)
+dat_nutsall <- do.call("rbind", list(dat_harv, dat_phys, dat_prod))
+
+# Calculating the yield for nuts 
+# yield_nut_dat = 
+do.call("rbind", list(dat_harv, dat_phys, dat_prod)) %>% 
+  dplyr::select(-c(X1)) %>%
+  pivot_longer(names_to = "crops", values_to = "value", cols = whea:tnuts) %>% 
+  pivot_wider(-unit, names_from = "rec_type", 
+              values_from = "value") %>% 
+  mutate(Y = (P*1000/H))%>% #checked for result. We are transforming mt to kg  when multiplying Production by 1000 to get unit kg/ha for yields
+  mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% 
+  # rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% 
+  write_csv(here("Input_data", "pxl_rec_spreadtec_gaul_spam_tnuts.csv"))
+dat.all=read_csv(here("Input_data", "pxl_rec_spread_tec_gaul_spam_tnuts.csv"))
+
+
+# SUBSETTING FOR EU-27 GAUL  ----------------------------------------------------------------------
+EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", 
+            "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR")
+# write.csv(EU_iso3, here("Input_data", "EU27_iso3_lst.csv"))
+length(EU_iso3) #27 member states > the list comes from gams
+
+# Check if the EU countries iso3 are all present in dat.phys etc.
+# EU_iso3 %in% dat.phys$iso3 #Validation if all EU27 countries are in the dfs below
+dat.phys.EU = read_csv(here("Input_data", "phys_tnuts_rest_spam_gaul_FINAL.csv")) %>% #Pixel level data
+  filter(iso3 %in% EU_iso3) %>% 
+  mutate(alloc_key = str_sub(alloc_key, start=2)) %>% 
+  dplyr::select(-c(X1_1, X1))
+
+
+dat.prod.EU = read_csv(here("Input_data", "prod_tnuts_rest_spam_gaul_FINAL1.csv")) %>% 
+  filter(iso3 %in% EU_iso3)%>% 
+  mutate(alloc_key = str_sub(alloc_key, start=2)) %>% 
+  dplyr::select(-c(X1))
+
+
+dat.harv.EU = read_csv(here("Input_data", "harv_tnuts_rest_spam_gaul_FINAL.csv")) %>% 
+  filter(iso3 %in% EU_iso3) %>% 
+  mutate(alloc_key = str_sub(alloc_key, start=2)) %>% 
+  dplyr::select(-c(X1_1, X1))
+
+
+dat.all.EU=  do.call("rbind", list(dat.phys.EU, dat.prod.EU, dat.harv.EU)) %>% 
+  # filter(iso3 %in% EU_iso3) %>% 
+  # dplyr::select(-c(X1, X1_1)) %>%
+  pivot_longer(names_to = "crops", values_to = "value", cols = whea:tnuts) %>% 
+  pivot_wider(-unit, names_from = "rec_type", 
+              values_from = "value" ) %>% 
+  mutate(Y = (P*1000/H)) #%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. 
+mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% 
+  # rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% 
+  write_csv(here("Input_data", "pxl_rec_tec_gaul_spam_tnuts_EU_27_corr_prod.csv")) #IMPORTANT dataset. Pixel level. Tnuts.
+dat.all.EU=read_csv(here("Input_data","pxl_rec_tec_gaul_spam_tnuts_EU_27_corr_prod.csv"))
+
+# Testing the approach
+# {
+# # cbind dfs - testing subset
+# fil.prod = dat.prod %>% top_n(n=20)
+# fil.harv = dat.harv %>% top_n(n=20)
+# fil.phys  = dat.phys %>% top_n(n=20)
+# 
+# lst.tnuts = list(fil.harv, fil.phys, fil.prod)
+# x= do.call("rbind", list(fil.harv, fil.phys, fil.prod))
+# 
+# long.dat.nuts <- do.call("rbind", list(fil.harv, fil.phys, fil.prod)) %>%
+#   dplyr::select(-c(X1)) %>%
+#   pivot_longer(names_to = "crops", values_to = "value", cols = whea:rest) %>% 
+#   pivot_wider(-unit, names_from = "rec_type", 
+#               values_from = "value" ) #%>% 
+#   mutate(Y = (P*1000/H))%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. 
+#   mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% 
+#   rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% 
+#   write.csv(here("Input_data", "test_rbind_fil"))
+# } 
+
+# Crop name mapping -------------------------------------------------------
+# SPAM mapping 
+nam_cat <- read_csv(here("Input_data", 'crop_names.csv')) # Loading crop name sheets - mapping SPAM -- FAO (FROM SPAM)
+nam_cat <- nam_cat %>%  mutate(FAOCODE = as.double(FAOCODE)) %>% 
+  rename(No_spam = names(nam_cat)[1])
+
+GAEZ_SPAM_crp = read_csv(here("Input_data", "6-GAEZ-and-SPAM-crops-correspondence-2015-02-26.csv")) #Mapping GAEZ and SPAM (from SPAM)
+GAEZ_SPAM_crp = GAEZ_SPAM_crp %>% 
+  rename(No_spam = names(.)[4]) %>% 
+  dplyr::select('SPAM crop long name', 'GAEZ crop', No_spam) 
+
+# ORiginal Croplist Modeldat CIFOS
+FAO_CIFOS_crp = read_csv(here("Input_data", "FAO_modeldat_2020.csv"))
+
+# FAO spreaddsheets 
+FAO_stat_control = read_csv(here("Input_data","FAOSTAT_data_12-26-2020-1.csv"))
+food_cat <- read_csv(here("Input_data",'foodnonfood.csv'))# laoding food usage sheet 
+food_cat = food_cat %>% 
+  rename(`SPAM short name`=Crop_short)
+FAOcntr <- read_csv(here("Input_data",'FAO_countries.csv')) # loading FAO country codes
+FAOcntr <- dplyr::select(FAOcntr, 'ISO3', 'FAOSTAT_CODE', "GAUL_CODE", 'Continent', 'Subcontinent')
+
+
+# The incomplete mapping of SPAM, GAEZ, FAOstat, FAO_Cifos are made 
+# Not all the crops are matched perfectly. This is made by hand in excel (export-import a chunk below)
+model_dat = left_join(FAO_CIFOS_crp, FAO_stat_control, by="crop") %>%
+  left_join(nam_cat, ., by="FAOCODE") %>%
+  left_join(.,GAEZ_SPAM_crp, by= "No_spam") #%>% 
+write.csv(here("Input_data", "model_crops_incomplete.csv"))
+
+# This data frame is a complete mapping of GAEZ, SPAM and FAO (FAO original and CIFOS model dat FAO crop names)
+# The FAO crop names from the model data differ slightly between Model dat and the fAO crops that I exctracted from FAOSTAT. 
+# Crop others were adjusted according to SPAM indication and the GAEZ mapped crops (cereal others = oats, etc.)
+
+dat_crp_map <- read_csv(here("Input_data","model_crops_mapping_GAEZ_SPAM_FAO_complete.csv"))
+# dat_crp_map2 <- read_csv(here("Input_data", "model_crops_incomplete.csv")) 
+
+#Validation > Result = Complete match!
+# dat_crp_ma p2$ crop_cifos %in% FAO_CIFOS_crp$crop 
+
+dat_crp_map1 = dat_crp_map %>% 
+  rename(crop_cifos = crop) %>% 
+  left_join(.,food_cat) #adding food categories (food/non-food)
+
+dat_crp_map1$Usage[42]="food" #adding the food category to treenuts 
+write.csv(dat_crp_map1, here("Input_data", "crop_name_mapping_complete.csv")) # this file can be used for further mapping of the crops 
+
+# Aggregating to ADM1 level EU -----------------------------------------------
+# Load and transform data 
+dat.pxl.EU = read_csv(here("Input_data", "pxl_rec_tec_gaul_spam_tnuts_EU_27_corr_prod.csv"))
+dat_TA_yld = read_csv(here("Input_data", "spam2010V2r0_global_Y_TA.csv")) %>% 
+  dplyr::select(alloc_key,name_cntr,name_adm1, iso3, prod_level, x, y) %>% 
+  mutate(alloc_key = str_sub(alloc_key, 2))
+
+
+# Mapping countries
+FAOcntr <- read_csv(here("Input_data", 'FAO_countries.csv'))
+FAOcntr <- dplyr::select(FAOcntr, 'ISO3', 'FAOSTAT_CODE', "GAUL_CODE", 'Continent', 'Subcontinent') %>% 
+  rename(iso3=ISO3)
+dat_EU27 = read_csv(here("Input_data", "Country_name_mapping_incomplete.csv"))%>% 
+  left_join(.,FAOcntr, by="iso3")
+
+dat_crop_nam = read_csv(here("Input_data", "crop_name_mapping_complete.csv")) %>% 
+  dplyr::select(-c(X1,X1_1)) %>% 
+  rename(crops = 'SPAM short name')
+
+# Transform pixel data so that it has SPAM variables (adm1 level) 
+
+dat.pixel.EU = dat.pxl.EU %>%
+  mutate_at(vars(alloc_key), funs(as.character)) %>%  
+  left_join(.,dat_TA_yld, by="alloc_key") %>%
+  dplyr::select(alloc_key, iso3.x, name_cntr.x,name_adm1.x, prod_level, iso3.x, G2008_1_ID, 
+                ADM0_CODE, ADM0_NAME, ADM1_CODE, ADM1_NAME, x, y, crops, tech_type) %>% 
+  rename(iso3=iso3.x,
+         name_cntr=name_cntr.x,
+         name_adm1=name_adm1.x) %>% 
+  left_join(.,dat_crop_nam, by="crops") %>% 
+  left_join(.,dat_EU27, by="iso3") %>% 
+  dplyr::select(prod_level, G2008_1_ID, tech_type, CIFOS_cntr_nam, crop_cifos, crops) 
+
+# df with prod_level and Cifos country EU
+EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", 
+            "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR")
+
+dat_cntr_EU = dat.pxl.EU %>%
+  mutate_at(vars(alloc_key), funs(as.character)) %>%
+  left_join(.,dat_TA_yld, by="alloc_key") %>%
+  dplyr::select(prod_level, iso3.x, name_adm1.x) %>% 
+  dplyr::distinct_at(vars('prod_level', 'iso3.x', "name_adm1.x"), .keep_all = F) %>%
+  rename(iso3=iso3.x, 
+         name_adm1=name_adm1.x) %>% 
+  filter(iso3 %in% EU_iso3) %>% 
+  left_join(.,dat_EU27, by="iso3") %>% 
+  dplyr::select(prod_level, iso3, name_adm1, CIFOS_cntr_nam, Continent, Subcontinent)# %>% 
+# rename(
+# iso3=iso3.x, 
+# name_adm1=name_adm1.x)
+
+# df with crops and cifos crops
+dat_crop_nam1 = read_csv(here("Input_data", "crop_name_mapping_complete.csv")) %>% 
+  dplyr::select('SPAM short name', FAONAMES, crop_cifos) %>% 
+  rename(crops = 'SPAM short name')
+
+# Aggregating H,A,P and calculating Yields per Adm1 level 
+dat.adm1.EU = dat.pxl.EU %>%
+  mutate_at(vars(alloc_key), funs(as.character)) %>%
+  #   left_join(., dat.pxl.EU, by="ADM1_NAME") %>% 
+  # mutate_at(vars(alloc_key), funs(as.character)) %>%
+  left_join(.,dat_TA_yld, by="alloc_key") %>%
+  dplyr::select(iso3.x, name_cntr.x,  name_adm1.x, ADM0_NAME, ADM1_NAME, prod_level, tech_type, crops, A, H, P) %>%
+  group_by(prod_level, tech_type, crops) %>% 
+  summarize(across(.cols = A:P, .fns = sum))%>% #all three rec types are summed per adm1(prod_level) and crop
+  mutate(Y = (P*1000/H))%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. 
+  mutate_at(vars(Y), ~replace(., is.nan(.), 0)) #%>% 
+
+# This is the sheet that contains the parameters that are important for the GAMS input file
+# It contains yield, harvested and physical area per country, adm1 unit and crop.
+EU27_crp_adm1 = dat.adm1.EU %>% 
+  left_join(.,dat_cntr_EU, by="prod_level") %>% 
+  filter(iso3 %in% EU_iso3) %>%
+  left_join(.,dat_crop_nam1, by="crops") %>% 
+  dplyr::select(iso3, FAONAMES, Subcontinent, Continent, CIFOS_cntr_nam, name_adm1, crop_cifos, A, H, P, Y) %>% 
+  write_csv(here("Input_data", "EU27_crp_adm1_gaul.csv")) # Final crop yield,area,etc. file. 
+
+# Final dataset!!
+EU27_crp_adm1 <- read_csv(here("Input_data", "EU27_crp_adm1_gaul.csv"))
+View(EU27_crp_adm1)
+
+length(unique(EU27_crp_adm1$iso3))
+is.na(EU27_crp_adm1)#Correct! 27 countries 
+
+
+
+
+
+
+
+
+
+
+# 
+# 
+# 
+# # OLD ---------------------------------------------------------------------
+# # Loading the files and calculate final ratio -----------------------------
+# wd2=setwd(here("Input_data"))
+# 
+# sph.prod1 = read_sf(dsn = wd2, layer = "GADM_treenut_rest_pd")
+# sph.harv1 = read_sf(dsn = wd2, layer = "GADM_treenut_rest_ha")
+# sph.all = st_join(sph.prod, sph.harv, join = st_within)
+# write_sf(sph.all, dsn = here("Input_data"), driver = "ESRI Shapefile") #shapefile is called 'Input_data'
+# 
+# join_harv = st_join(SPAM_GADM_JOIN_GIS_left, sph.harv, join = st_intersects)
+# join_harv_prod_SPAM_GADM = st_join(join_harv, sph.prod, join = st_intersects)
+# 
+# # Joining GADM with SPAM -------------------------------------------------------
+# wd=setwd(here("Input_data"))
+# 
+# # Load and check shapefile 
+# shape=read_sf(dsn = wd, layer = "gadm36") # multipolygons containing all admin levels globally
+# shape=dplyr::select(shape, 'UID', 'geometry', 'GID_0', 'GID_1')
+# # shape_fil=filter(shape, GID_0 == 'FIN')
+# 
+# # Making SPAM spatial explicit 
+# dat_spam=read_csv(here("Input_data", "spam2010V2r0_global_Y_TA.csv"))
+# dat_spam=dplyr::select(dat_spam, 'x','y', 'alloc_key')
+# 
+# # Make SPAM spatial 
+# projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
+# dat_cent <- st_as_sf(x = dat_spam,                         
+#                      coords = c("x", "y"),
+#                      crs = projcrs)
+# 
+# class(dat_cent)
+# # dat_cent = filter(df, iso3=="FIN")
+# 
+# 
+# SPAM_GADM_JOIN_GIS_left = st_join(dat_cent, shape, join = st_intersects, left = T) #https://postgis.net/docs/ST_Within.html 
+# # head(SPAM_GADM_JOIN_GIS)
+# # class(SPAM_GADM_Join)
+# 
+# st_write(obj = SPAM_GADM_JOIN_GIS, dsn = here("Input_data"), driver = "ESRI Shapefile")
+# st_write(obj = SPAM_GADM_JOIN_GIS_left, dsn = here("Input_data"), driver = "ESRI Shapefile")
+# 
+# # Joining SPAM and GAUL 
+# 
+# 
diff --git a/Treenuts/TreenutsGAUL2.R b/Treenuts/TreenutsGAUL2.R
new file mode 100644
index 0000000000000000000000000000000000000000..3d7d89126a200320e083026e4489db4b1454d96d
--- /dev/null
+++ b/Treenuts/TreenutsGAUL2.R
@@ -0,0 +1,789 @@
+easypackages::packages(c("tidyverse", 'sf', 'raster','sp','dplyr','spatialEco',
+                         'exactextractr','tibble', 'here', "readr", "margrittr","tydr",
+                         "readxl", "sfheaders"))
+
+# New approach  -----------------------------------------------------------
+# Idea: 
+# 1. Load all the harvested area and yields (GeoTIFF) 
+# 2. Extract to point of the Spam cells
+# 3.       
+# 4. Then look at the proportion between Treenuts and the rest of spam crop others for each pixel
+
+# Zonal statistics  -------------------------------------------------------
+wd=setwd(here("Input_data"))
+
+# Load and check shapefile 
+shape_gadm=read_sf(dsn = wd, layer = "gadm36") # multipolygons containing all admin levels globally
+shape_gadm=dplyr::select(shape_gadm, 'UID', 'geometry', 'GID_0', 'GID_1')
+
+shape_gaul=read_sf(here("Input_data", "g2008_1.shp")) #actual GAUL map 
+shape_gaul=dplyr::select(shape_gaul, 'G2008_1_ID',  'ADM1_CODE', "ADM0_CODE", 'ADM0_NAME', 'ADM1_NAME', "CONTINENT", "REGION", "geometry")
+# class(shape_gadm)
+# shape_fil=filter(shape, GID_0 == 'FIN')
+dat_spam=read_csv(here("Input_data", "spam2010V2r0_global_Y_TA.csv"))
+dat_spam=dplyr::select(dat_spam, x,y, alloc_key, name_adm1, iso3)
+
+# Make SPAM spatial 
+# projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
+# proj_mercator = "+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs"
+proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below 
+proj_meter = "+datum=WGS84 +units=m +no_defs"
+dat_cent <- st_as_sf(x = dat_spam,                         
+                     coords = c("x", "y"),
+                     crs = proj)
+
+
+# Load treenuts and rest files 
+{
+  # ylds - Nuts
+  rast_walnut_yld = raster(here("Input_data","walnut_YieldPerHectare.tif"))
+  {
+    
+    # class(rast_walnut_yld)
+    
+    # rast_walnut_withoutNA = rast_walnut_yld
+    # # 
+    # mean(rast_walnut_yld$walnut_YieldPerHectare)
+    # mean(rast_walnut_withoutNA$walnut_YieldPerHectare)
+    # rast_walnut_withoutNA[rast_walnut_withoutNA$walnut_YieldPerHectare == 0] <- NA
+    # check=as.raster(rast_walnut_withoutNA)
+    # 
+    # cellStats(rast_walnut_withoutNA$walnut_YieldPerHectare, mean)
+    # 
+    # cellStats(rast_walnut_withoutNA, mean)
+    # cellStats(rast_walnut_yld, mean)
+    # rast_walnut_withoutNA$walnut_YieldPerHectare
+  }
+  rast_almond_yld = raster(here("Input_data","almond_YieldPerHectare.tif"))
+  rast_chestnut_yld = raster(here("Input_data","chestnut_YieldPerHectare.tif"))
+  rast_cashew_yld = raster(here("Input_data","cashew_YieldPerHectare.tif"))
+  rast_pistachio_yld = raster(here("Input_data","pistachio_YieldPerHectare.tif"))
+  rast_kolanut_yld = raster(here("Input_data", "kolanut_YieldPerHectare.tif"))
+  rast_hazelnut_yld = raster(here("Input_data","hazelnut_YieldPerHectare.tif"))
+  rast_brazil_yld = raster(here("Input_data","brazil_YieldPerHectare.tif"))
+  rast_areca_yld = raster(here("Input_data","areca_YieldPerHectare.tif"))
+  rast_nutnes_yld  = raster(here("Input_data","nutnes_YieldPerHectare.tif"))
+  {
+    # 0's to Na's
+    # rast_almond_yldNA=rast_almond_yld[rast_almond_yld$almond_YieldPerHectare == 0] <- NA
+    
+    # rast_almond_yld[rast_almond_yld$almond_YieldPerHectare == 0] <- NA
+    
+    # cellStats(rast_walnut_withoutNA$walnut_YieldPerHectare, min)
+    # cellStats(rast_walnut_yld$walnut_YieldPerHectare, min)
+    
+    # rast_almond_yld
+  }
+  # yld - Rest
+  rast_anise_yld = raster(here("Input_data","aniseetc_YieldPerHectare.tif"))
+  rast_chili_yld = raster(here("Input_data","chilleetc_YieldPerHectare.tif"))
+  rast_cinnamon_yld = raster(here("Input_data","cinnamon_YieldPerHectare.tif"))
+  rast_ginger_yld = raster(here("Input_data",'ginger_YieldPerHectare.tif'))
+  rast_hop_yld = raster(here("Input_data",'hop_YieldPerHectare.tif'))
+  rast_mate_yld = raster(here("Input_data",'mate_YieldPerHectare.tif'))
+  rast_nutmeg_yld = raster(here("Input_data","nutmeg_YieldPerHectare.tif"))
+  rast_pepper_yld = raster(here("Input_data",'pepper_YieldPerHectare.tif'))
+  rast_peppermint_yld = raster(here("Input_data",'peppermint_YieldPerHectare.tif'))
+  rast_pyreth_yld = raster(here("Input_data",'pyrethrum_YieldPerHectare.tif'))
+  rast_ramie_yld = raster(here("Input_data",'ramie_YieldPerHectare.tif'))
+  rast_ruber_yld = raster(here("Input_data",'rubber_YieldPerHectare.tif'))
+  rast_spices_yld = raster(here("Input_data",'spicenes_YieldPerHectare.tif'))
+  rast_sugarnes_yld = raster(here("Input_data",'sugarnes_YieldPerHectare.tif'))
+  rast_vanilla_yld = raster(here("Input_data",'vanilla_YieldPerHectare.tif'))
+  
+  # Production - treenuts
+  
+  rast_walnut_pd = raster(here("Input_data","walnut_Production.tif"))
+  rast_almond_pd = raster(here("Input_data","almond_Production.tif"))
+  rast_chestnut_pd = raster(here("Input_data","chestnut_Production.tif"))
+  rast_cashew_pd = raster(here("Input_data","cashew_Production.tif"))
+  rast_pistachio_pd = raster(here("Input_data","pistachio_Production.tif"))
+  rast_kolanut_pd = raster(here("Input_data", "kolanut_Production.tif"))
+  rast_hazelnut_pd = raster(here("Input_data","hazelnut_Production.tif"))
+  rast_brazil_pd = raster(here("Input_data","brazil_Production.tif"))
+  rast_areca_pd = raster(here("Input_data","areca_Production.tif"))
+  rast_nutnes_pd  = raster(here("Input_data","nutnes_Production.tif"))
+  # Production  -rest
+  rast_anise_pd = raster(here("Input_data","aniseetc_Production.tif"))
+  rast_chili_pd = raster(here("Input_data","chilleetc_Production.tif"))
+  rast_cinnamon_pd = raster(here("Input_data","cinnamon_Production.tif"))
+  rast_ginger_pd = raster(here("Input_data",'ginger_Production.tif'))
+  rast_hop_pd = raster(here("Input_data",'hop_Production.tif'))
+  rast_mate_pd = raster(here("Input_data",'mate_Production.tif'))
+  rast_nutmeg_pd = raster(here("Input_data","nutmeg_Production.tif"))
+  rast_pepper_pd = raster(here("Input_data",'pepper_Production.tif'))
+  rast_peppermint_pd = raster(here("Input_data",'peppermint_Production.tif'))
+  rast_pyreth_pd = raster(here("Input_data",'pyrethrum_Production.tif'))
+  rast_ramie_pd = raster(here("Input_data",'ramie_Production.tif'))
+  rast_ruber_pd = raster(here("Input_data",'rubber_Production.tif'))
+  rast_spices_pd = raster(here("Input_data",'spicenes_Production.tif'))
+  rast_sugarnes_pd = raster(here("Input_data",'sugarnes_Production.tif'))
+  rast_vanilla_pd = raster(here("Input_data",'vanilla_Production.tif'))
+x=rasterToPolygons(rast_vanilla_pd, digits=12)
+x1=st_as_sf(x)
+  
+  # harvested area - treentus 
+  rast_walnut_ha = raster(here("Input_data","walnut_HarvestedAreaHectares.tif"))
+  rast_almond_ha = raster(here("Input_data","almond_HarvestedAreaHectares.tif"))
+  rast_chestnut_ha = raster(here("Input_data","chestnut_HarvestedAreaHectares.tif"))
+  rast_cashew_ha = raster(here("Input_data","cashew_HarvestedAreaHectares.tif"))
+  rast_pistachio_ha = raster(here("Input_data","pistachio_HarvestedAreaHectares.tif"))
+  rast_kolanut_ha = raster(here("Input_data", "kolanut_HarvestedAreaHectares.tif"))
+  rast_hazelnut_ha = raster(here("Input_data","hazelnut_HarvestedAreaHectares.tif"))
+  rast_brazil_ha = raster(here("Input_data","brazil_HarvestedAreaHectares.tif"))
+  rast_areca_ha = raster(here("Input_data","areca_HarvestedAreaHectares.tif"))
+  rast_nutnes_ha  = raster(here("Input_data","nutnes_HarvestedAreaHectares.tif"))
+  # HarvestedAreaHectares  -rest
+  rast_anise_ha = raster(here("Input_data","aniseetc_HarvestedAreaHectares.tif"))
+  rast_chili_ha = raster(here("Input_data","chilleetc_HarvestedAreaHectares.tif"))
+  rast_cinnamon_ha = raster(here("Input_data","cinnamon_HarvestedAreaHectares.tif"))
+  rast_ginger_ha = raster(here("Input_data",'ginger_HarvestedAreaHectares.tif'))
+  rast_hop_ha = raster(here("Input_data",'hop_HarvestedAreaHectares.tif'))
+  rast_mate_ha = raster(here("Input_data",'mate_HarvestedAreaHectares.tif'))
+  rast_nutmeg_ha = raster(here("Input_data","nutmeg_HarvestedAreaHectares.tif"))
+  rast_pepper_ha = raster(here("Input_data",'pepper_HarvestedAreaHectares.tif'))
+  rast_peppermint_ha = raster(here("Input_data",'peppermint_HarvestedAreaHectares.tif'))
+  rast_pyreth_ha = raster(here("Input_data",'pyrethrum_HarvestedAreaHectares.tif'))
+  rast_ramie_ha = raster(here("Input_data",'ramie_HarvestedAreaHectares.tif'))
+  rast_ruber_ha = raster(here("Input_data",'rubber_HarvestedAreaHectares.tif'))
+  rast_spices_ha = raster(here("Input_data",'spicenes_HarvestedAreaHectares.tif'))
+  rast_sugarnes_ha = raster(here("Input_data",'sugarnes_HarvestedAreaHectares.tif'))
+  rast_vanilla_ha = raster(here("Input_data",'vanilla_HarvestedAreaHectares.tif'))
+}
+
+# yld_list=list(rast_almond_yld, rast_cashew_yld, rast_chestnut_yld, rast_hazelnut_yld, rast_kolanut_yld, rast_pistachio_yld, rast_walnut_yld)
+# stack_yld=stack(rast_almond_yld, rast_cashew_yld, rast_chestnut_yld, rast_hazelnut_yld, rast_kolanut_yld, rast_pistachio_yld, rast_walnut_yld)
+# stack_ha=stack(rast_almond_ha, rast_cashew_ha, rast_chestnut_ha, rast_hazelnut_ha, rast_kolanut_ha, rast_pistachio_ha, rast_walnut_ha)
+# 
+# dat$mean.nfertilizer_global
+# summary(dat$mean.nfertilizer_global)
+# 
+
+# Perform zonal statistics ------------------------------------------------
+# FunZone_yld=function(rast,shape, na.rm=T){ # raster and vector
+#   c=zonal.stats(shape, rast, stats = c("Fun_nzmean")) 
+#   c$UID <- shape$UID  # assigning the identifier of the original df to the extracted df
+#   merge(shape, c, by.x = 'UID', by.y = 'UID') # merge the two files together
+# }
+# GADM
+FunZone_sum=function(rast){ # raster and vector
+  c=zonal.stats(shape, rast, stats = c("sum")) 
+  c$UID <- shape$UID  # assigning the identifier of the original df to the extracted df
+  dat <- merge(shape, c, by.x = 'UID', by.y = 'UID') # merge the two files together
+}
+
+# GAUL
+FunZone_sum=function(rast){ # raster and vector
+  c=zonal.stats(shape_gaul, rast, stats = c("sum")) 
+  c$G2008_1_ID <- shape_gaul$G2008_1_ID  # assigning the identifier of the original df to the extracted df
+  dat <- merge(shape_gaul, c, by.x = 'G2008_1_ID', by.y = 'G2008_1_ID') # merge the two files together
+}
+
+# 
+# original ----------------------------------------------------------------
+FunZone=function(rast, shape, na.rm=T){ # raster and vector
+  c=zonal.stats(shape, rast, stats = c("mean"))
+  c$UID <- shape$UID  # assigning the identifier of the original df to the extracted df
+  dat <- merge(shape, c, by.x = 'UID', by.y = 'UID') # merge the two files togethere
+}
+?read_sf
+# # test ignoring 0s --------------------------------------------------------
+# nzmean <- function(x) {
+#   zvals <- x==0
+#   if (all(zvals)) 0 else mean(x[!zvals])
+# }
+# 
+# Fun_nzmean=function(r, na.rm=F){
+#   mean(r[r!=0])
+#   }
+# 
+# FunZone_yld_0=function(rast,shape){ # raster and vector
+#   c=zonal.stats(shape, rast, stats = c("nzmean")) 
+#   c$UID <- shape$UID  # assigning the identifier of the original df to the extracted df
+#   merge(shape, c, by.x = 'UID', by.y = 'UID') # merge the two files together
+# }
+# 
+# # Test raster 
+# v2=raster::extract(rast_walnut_yld,shape_fil,fun='Fun_nzmean')
+
+
+# Yields ------------------------------------------------------------------
+# Apply zonal stats to treenuts 
+ex_walnut_yld = FunZone_yld(rast_walnut_yld,shape_fil)
+ex_walnut_yld_0 = FunZone_yld(rast_walnut_yld,shape_fil)
+
+# mean(ex_walnut_yld$mean.walnut_YieldPerHectare)
+# mean(ex_walnut_yld_0$mean.walnut_YieldPerHectare)
+# mean(ex_walnut_withNA_FIn$mean.walnut_YieldPerHectare)
+
+ex_walnut_withNA_FIn = FunZone_yld(rast_walnut_withoutNA,shape_fil)
+ex_walnut_yld$nzmean.walnut_YieldPerHectare
+# max(ex_walnut_withNA_FIn$mean.walnut_YieldPerHectare)
+
+
+ex_almond_yld = FunZone_yld(rast_almond_yld)
+ex_chestnut_yld = FunZone_yld(rast_chestnut_yld)
+ex_cashew_yld = FunZone_yld(rast_cashew_yld)
+ex_pistachio_yld = FunZone_yld(rast_pistachio_yld)
+ex_kolanut_yld = FunZone_yld(rast_kolanut_yld)
+ex_hazelnut_yld = FunZone_yld(rast_hazelnut_yld)
+ex_brazil_yld = FunZone_yld(rast_brazil_yld)
+ex_areca_yld = FunZone_yld(rast_areca_yld)
+ex_nutnes_yld = FunZone_yld(rast_nutnes_yld)
+
+# Apply zonal stats to rest 
+ex_anise_yld = FunZone_yld(rast_anise_yld)
+ex_chili_yld = FunZone_yld(rast_chili_yld)
+ex_cinnamon_yld =FunZone_yld(rast_cinnamon_yld) 
+ex_ginger_yld = FunZone_yld(rast_ginger_yld)
+ex_hop_yld = FunZone_yld(rast_hop_yld)
+ex_mate_yld = FunZone_yld(rast_mate_yld)
+ex_nutmeg_yld = FunZone_yld(rast_nutmeg_yld)
+ex_pepper_yld = FunZone_yld(rast_pepper_yld)
+ex_peppermint_yld = FunZone_yld(rast_peppermint_yld)
+ex_pyreth_yld = FunZone_yld(rast_pyreth_yld)
+ex_ramie_yld = FunZone_yld(rast_ramie_yld)
+ex_ruber_yld = FunZone_yld(rast_ruber_yld)
+ex_spices_yld = FunZone_yld(rast_spices_yld)
+ex_sugarnes_yld = FunZone_yld(rast_sugarnes_yld)
+ex_vanilla_yld = FunZone_yld(rast_vanilla_yld)
+
+# Join  columns (cbind)
+lst.ex.yld = list(ex_walnut_yld,ex_almond_yld,ex_chestnut_yld,ex_cashew_yld,ex_pistachio_yld, ex_kolanut_yld, ex_hazelnut_yld, ex_areca_yld, ex_brazil_yld, ex_nutnes_yld, 
+                  ex_anise_yld, ex_chili_yld, ex_cinnamon_yld, ex_ginger_yld, ex_hop_yld, ex_mate_yld, ex_nutmeg_yld, ex_pepper_yld, ex_peppermint_yld, ex_pyreth_yld, ex_ramie_yld, ex_ruber_yld, ex_spices_yld, ex_sugarnes_yld, ex_vanilla_yld)
+dat_ex.yld = do.call("cbind", lst.ex.yld) # merging all tech types
+# head(dat_ex.yld)
+
+treenut_yld = dat_ex.yld %>% 
+  select(UID, GID_0,GID_1,mean.walnut_YieldPerHectare, 
+         mean.almond_YieldPerHectare,
+         mean.chestnut_YieldPerHectare,
+         mean.cashew_YieldPerHectare,
+         mean.pistachio_YieldPerHectare,
+         mean.kolanut_YieldPerHectare,
+         mean.hazelnut_YieldPerHectare, 
+         mean.areca_YieldPerHectare, 
+         mean.brazil_YieldPerHectare, 
+         mean.nutnes_YieldPerHectare, 
+         geometry) %>% 
+  rowwise()%>%
+  mutate(treenut_yld= rowmean(c_across(mean.almond_YieldPerHectare:mean.areca_YieldPerHectare)))%>%
+  mutate(treenut_yld= rowmean(c_across(mean.anise_YieldPerHectare:mean.vanilla_YieldPerHectare)))
+
+
+# class(treenut_yld)
+# write.csv(treenut_yld, 'D:/Raw_dat/Raw_dat/Ramakutty/HarvestedAreaYield175Crops_Geotiff/HarvestedAreaYield175Crops_Geotiff/Treenuts', row.names=T) 
+st_write(treenut_yld, here("Input_data", "GADM_treenut.shp"), driver="ESRI Shapefile")  # create to a shapefile 
+
+
+# Production  -------------------------------------------------------------
+
+ex_walnut_pd = FunZone_sum(rast_walnut_pd)
+ex_almond_pd = FunZone_sum(rast_almond_pd)
+ex_chestnut_pd = FunZone_sum(rast_chestnut_pd)
+ex_cashew_pd = FunZone_sum(rast_cashew_pd)
+ex_pistachio_pd = FunZone_sum(rast_pistachio_pd)
+ex_kolanut_pd = FunZone_sum(rast_kolanut_pd)
+ex_hazelnut_pd = FunZone_sum(rast_hazelnut_pd)
+ex_brazil_pd = FunZone_sum(rast_brazil_pd)
+ex_areca_pd = FunZone_sum(rast_areca_pd)
+ex_nutnes_pd = FunZone_sum(rast_nutnes_pd)
+
+# Rest
+ex_anise_pd = FunZone_sum(rast_anise_pd)
+ex_chili_pd = FunZone_sum(rast_chili_pd)
+ex_cinnamon_pd =FunZone_sum(rast_cinnamon_pd) 
+ex_ginger_pd = FunZone_sum(rast_ginger_pd)
+ex_hop_pd = FunZone_sum(rast_hop_pd)
+ex_mate_pd = FunZone_sum(rast_mate_pd)
+ex_nutmeg_pd = FunZone_sum(rast_nutmeg_pd)
+ex_pepper_pd = FunZone_sum(rast_pepper_pd)
+ex_peppermint_pd = FunZone_sum(rast_peppermint_pd)
+ex_pyreth_pd = FunZone_sum(rast_pyreth_pd)
+ex_ramie_pd = FunZone_sum(rast_ramie_pd)
+ex_ruber_pd = FunZone_sum(rast_ruber_pd)
+ex_spices_pd = FunZone_sum(rast_spices_pd)
+ex_sugarnes_pd = FunZone_sum(rast_sugarnes_pd)
+ex_vanilla_pd = FunZone_sum(rast_vanilla_pd)
+
+# list all treenuts
+lst.ex.pd = list(ex_walnut_pd,ex_almond_pd,ex_chestnut_pd,ex_cashew_pd,ex_pistachio_pd, ex_kolanut_pd, ex_hazelnut_pd, ex_areca_pd, ex_nutnes_pd,
+                 ex_anise_pd, ex_chili_pd, ex_cinnamon_pd, ex_ginger_pd, ex_hop_pd, ex_mate_pd, ex_nutmeg_pd, ex_pepper_pd, ex_peppermint_pd, ex_pyreth_pd, ex_ramie_pd, ex_ruber_pd, ex_spices_pd, ex_sugarnes_pd, ex_vanilla_pd)
+dat_ex.pd = do.call("cbind", lst.ex.pd) # merging all tech types
+
+# list rest
+
+treenut_pd = dat_ex.pd %>% 
+  # dplyr::select(UID, GID_0,GID_1,sum.walnut_Production, 
+  dplyr::select(G2008_1_ID, ADM1_CODE,ADM0_CODE, ADM1_NAME,ADM0_NAME, CONTINENT, REGION, 
+                sum.walnut_Production,   
+                sum.almond_Production,
+                sum.chestnut_Production,
+                sum.cashew_Production,
+                sum.pistachio_Production,
+                sum.kolanut_Production,
+                sum.hazelnut_Production,
+                sum.aniseetc_Production,
+                sum.chilleetc_Production,
+                sum.cinnamon_Production,
+                sum.ginger_Production,
+                sum.hop_Production,
+                sum.mate_Production, 
+                sum.nutmeg_Production,
+                sum.pepper_Production,
+                sum.peppermint_Production,
+                sum.pyrethrum_Production,
+                sum.ramie_Production,
+                sum.rubber_Production,
+                sum.spicenes_Production,
+                sum.sugarnes_Production,
+                sum.vanilla_Production,
+                geometry) %>% 
+  group_by(ADM1_CODE, ADM0_CODE) %>% 
+  mutate(treenut_pd= sum(sum.walnut_Production, 
+                         sum.almond_Production,
+                         sum.chestnut_Production,
+                         sum.cashew_Production,
+                         sum.pistachio_Production,
+                         sum.kolanut_Production,
+                         sum.hazelnut_Production))%>%
+  mutate(rest_pd = sum(sum.aniseetc_Production,
+                       sum.chilleetc_Production,
+                       sum.cinnamon_Production,
+                       sum.ginger_Production,
+                       sum.hop_Production,
+                       sum.mate_Production, 
+                       sum.nutmeg_Production,
+                       sum.pepper_Production,
+                       sum.peppermint_Production,
+                       sum.pyrethrum_Production,
+                       sum.ramie_Production,
+                       sum.rubber_Production,
+                       sum.spicenes_Production,
+                       sum.sugarnes_Production,
+                       sum.vanilla_Production)) %>% 
+  dplyr::select(G2008_1_ID, ADM1_CODE,ADM0_CODE, ADM1_NAME, ADM0_NAME, CONTINENT, REGION, treenut_pd, rest_pd)
+
+
+# class(treenut_yld)
+# write.csv(treenut_yld, 'D:/Raw_dat/Raw_dat/Ramakutty/HarvestedAreaYield175Crops_Geotiff/HarvestedAreaYield175Crops_Geotiff/Treenuts', row.names=T) 
+st_write(treenut_pd, here("Input_data", "GAUL_treenut_rest_pd.shp"), driver="ESRI Shapefile")  # create to a shapefile
+treenut_pd= read_sf(here("Input_data", "GAUL_treenut_rest_pd,shp", "GAUL_treenut_rest_pd.shp"))
+
+# Harvested area - Treenuts -----------------------------------------------
+
+ex_walnut_ha = FunZone_sum(rast_walnut_ha)
+ex_almond_ha = FunZone_sum(rast_almond_ha)
+ex_chestnut_ha = FunZone_sum(rast_chestnut_ha)
+ex_cashew_ha = FunZone_sum(rast_cashew_ha)
+ex_pistachio_ha = FunZone_sum(rast_pistachio_ha)
+ex_kolanut_ha = FunZone_sum(rast_kolanut_ha)
+ex_hazelnut_ha = FunZone_sum(rast_hazelnut_ha)
+ex_brazil_ha = FunZone_sum(rast_brazil_ha)
+ex_areca_ha = FunZone_sum(rast_areca_ha)
+ex_nutnes_ha = FunZone_sum(rast_nutnes_ha)
+
+# Rest
+ex_anise_ha = FunZone_sum(rast_anise_ha)
+ex_chili_ha = FunZone_sum(rast_chili_ha)
+ex_cinnamon_ha =FunZone_sum(rast_cinnamon_ha) 
+ex_ginger_ha = FunZone_sum(rast_ginger_ha)
+ex_hop_ha = FunZone_sum(rast_hop_ha)
+ex_mate_ha = FunZone_sum(rast_mate_ha)
+ex_nutmeg_ha = FunZone_sum(rast_nutmeg_ha)
+ex_pepper_ha = FunZone_sum(rast_pepper_ha)
+ex_peppermint_ha = FunZone_sum(rast_peppermint_ha)
+ex_pyreth_ha = FunZone_sum(rast_pyreth_ha)
+ex_ramie_ha = FunZone_sum(rast_ramie_ha)
+ex_ruber_ha = FunZone_sum(rast_ruber_ha)
+ex_spices_ha = FunZone_sum(rast_spices_ha)
+ex_sugarnes_ha = FunZone_sum(rast_sugarnes_ha)
+ex_vanilla_ha = FunZone_sum(rast_vanilla_ha)
+
+# list all treenuts
+lst.ex.ha = list(ex_walnut_ha,ex_almond_ha,ex_chestnut_ha,ex_cashew_ha,ex_pistachio_ha, ex_kolanut_ha, ex_hazelnut_ha, ex_areca_ha, ex_nutnes_ha,
+                 ex_anise_ha, ex_chili_ha, ex_cinnamon_ha, ex_ginger_ha, ex_hop_ha, ex_mate_ha, ex_nutmeg_ha, ex_pepper_ha, ex_peppermint_ha, ex_pyreth_ha, ex_ramie_ha, ex_ruber_ha, ex_spices_ha, ex_sugarnes_ha, ex_vanilla_ha)
+dat_ex.ha = do.call("cbind", lst.ex.ha) # merging all tech types
+
+# list rest
+
+treenut_ha = dat_ex.ha %>% 
+  dplyr::select(G2008_1_ID, ADM1_CODE,ADM0_CODE, ADM1_NAME, ADM0_NAME, CONTINENT, REGION,
+                sum.walnut_HarvestedAreaHectares, 
+                sum.almond_HarvestedAreaHectares,
+                sum.chestnut_HarvestedAreaHectares,
+                sum.cashew_HarvestedAreaHectares,
+                sum.pistachio_HarvestedAreaHectares,
+                sum.kolanut_HarvestedAreaHectares,
+                sum.hazelnut_HarvestedAreaHectares,
+                sum.aniseetc_HarvestedAreaHectares,
+                sum.chilleetc_HarvestedAreaHectares,
+                sum.cinnamon_HarvestedAreaHectares,
+                sum.ginger_HarvestedAreaHectares,
+                sum.hop_HarvestedAreaHectares,
+                sum.mate_HarvestedAreaHectares, 
+                sum.nutmeg_HarvestedAreaHectares,
+                sum.pepper_HarvestedAreaHectares,
+                sum.peppermint_HarvestedAreaHectares,
+                sum.pyrethrum_HarvestedAreaHectares,
+                sum.ramie_HarvestedAreaHectares,
+                sum.rubber_HarvestedAreaHectares,
+                sum.spicenes_HarvestedAreaHectares,
+                sum.sugarnes_HarvestedAreaHectares,
+                sum.vanilla_HarvestedAreaHectares,
+                geometry) %>% 
+  group_by(ADM0_CODE, ADM1_CODE) %>% 
+  mutate(treenut_ha = sum(sum.walnut_HarvestedAreaHectares, 
+                          sum.almond_HarvestedAreaHectares,
+                          sum.chestnut_HarvestedAreaHectares,
+                          sum.cashew_HarvestedAreaHectares,
+                          sum.pistachio_HarvestedAreaHectares,
+                          sum.kolanut_HarvestedAreaHectares,
+                          sum.hazelnut_HarvestedAreaHectares)) %>% 
+  mutate(rest_ha = sum(sum.aniseetc_HarvestedAreaHectares,
+                       sum.chilleetc_HarvestedAreaHectares,
+                       sum.cinnamon_HarvestedAreaHectares,
+                       sum.ginger_HarvestedAreaHectares,
+                       sum.hop_HarvestedAreaHectares,
+                       sum.mate_HarvestedAreaHectares, 
+                       sum.nutmeg_HarvestedAreaHectares,
+                       sum.pepper_HarvestedAreaHectares,
+                       sum.peppermint_HarvestedAreaHectares,
+                       sum.pyrethrum_HarvestedAreaHectares,
+                       sum.ramie_HarvestedAreaHectares,
+                       sum.rubber_HarvestedAreaHectares,
+                       sum.spicenes_HarvestedAreaHectares,
+                       sum.sugarnes_HarvestedAreaHectares,
+                       sum.vanilla_HarvestedAreaHectares)) %>% 
+  dplyr::select(G2008_1_ID, ADM1_CODE,ADM0_CODE, ADM1_NAME, ADM0_NAME, CONTINENT, REGION, treenut_ha, rest_ha)
+
+
+
+# Joining SPAM with GAUL L -----------------------------------------------------------
+dat_spam=read_csv(here("input_data", "spam2010V2r0_global_Y_TA.csv"))
+dat_spam=dplyr::select(dat_spam, x,y, alloc_key, name_adm1, iso3, name_cntr)
+
+# Make SPAM spatial 
+# projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
+# proj_mercator = "+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs"
+proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below 
+dat_cent <- st_as_sf(x = dat_spam,                         
+                     coords = c("x", "y"),
+                     crs = proj)
+
+shape_gaul=read_sf(here("Input_data", "g2008_1.shp")) #actual GAUL map 
+
+
+# LOAD treenut production and harvested area 
+treenut_ha= read_sf(here("Input_data", "GAUL_treenut_rest_ha,shp", "GAUL_treenut_rest_ha.shp"))
+treenut_pd= read_sf(here("Input_data", "GAUL_treenut_rest_pd,shp", "GAUL_treenut_rest_pd.shp"))
+# write_sf(sph.all, dsn = here("Input_data", driver = "ESRI shapefile"))
+sph.prod.tib = treenut_pd %>% 
+  as_tibble() %>% 
+  dplyr::select(G2008_1_ID, ADM1_CODE, ADM0_CODE, ADM0_NAME, treenut_pd, rest_pd,geometry)
+
+sph.harv = treenut_ha %>% 
+  as_tibble() %>% 
+  dplyr::select(G2008_1_ID, ADM1_CODE, ADM0_CODE, ADM0_NAME, treenut_ha, rest_ha,geometry)
+# loading SPAM-GADM join
+
+# Creating a spatial join between the SPAM (pixel) and GAUL 2008
+SPAM_GAUL_JOIN = dat_cent %>% 
+  st_join(., shape_gaul, join = st_within, left = T)
+st_write(SPAM_GAUL_JOIN, here("Input_data", "GAUL2008_SPAM_LINK"), driver="ESRI Shapefile")  # From here go to script 
+SPAM_GAUL_JOIN=read_sf(here("Input_data", "GAUL2008_SPAM_LINK"))
+
+# GAUL aggregation and transformation  ------------------------------------
+
+# Independent -- Merging treenuts with SPAM -------------------------------
+# LOAD data from previous step (due to memory overuse)
+setwd(here('Input_data', "Rectypes_full_cropspread"))
+yld_dat <- read_csv('Yield_full_cropspread.csv')
+prd_dat <- read_csv('Production_full_cropspread.csv')
+har_dat <- read_csv('Harv_area_spatial_spread_SPAM.csv')
+phy_dat <- read_csv('Phys_area_full_cropspread.csv')
+
+# Aggregate results to adm1 level and calculate ratio between nuts and rest
+val_pd = treenut_pd %>% 
+  group_by(ADM1_CODE, ADM0_CODE) %>% 
+  summarise(sum_pd = sum(treenut_pd, keep = T),
+            sum_rest_pd = sum(rest_pd)) 
+write_sf(val_pd, here("Input_data", "val_pd.shp"))
+
+val_pd=read_sf(dsn=here("Input_data", "val_pd.shp"))
+val_pd=rename(val_pd,ADM1_CODE = ADM1_CO, #when loading the data run rename function
+              ADM0_CODE = ADM0_CO, 
+              sum_rest_pd = sm_rst_)
+
+
+val_ha = treenut_ha %>% 
+  group_by(ADM1_CODE, ADM0_CODE) %>% 
+  summarise(sum_ha = sum(treenut_ha, keep = T),
+            sum_rest_ha = sum(rest_ha)) #%>% 
+write_sf(val_ha, here("Input_data", "val_ha.shp"))
+
+val_ha=read_sf(here("Input_data", "val_ha.shp"))
+val_ha = rename(val_ha,
+                ADM1_CODE = ADM1_CO,
+                ADM0_CODE = ADM0_CO,
+                sum_rest_ha = sm_rst_)
+
+
+# Join of harvested area and production 
+join_ha_pd_gaul = val_pd %>%
+  group_by(ADM1_CODE, ADM0_CODE) %>%
+  as_tibble() %>% 
+  left_join(., val_ha, by="ADM1_CODE") %>% 
+  left_join(., shape_gaul, by = "ADM1_CODE") %>% 
+  rowwise() %>% 
+  mutate(tnuts_ratio_pd = sum_pd/sum(sum_pd, sum_rest_pd),
+         tnuts_ratio_ha = sum_ha/sum(sum_ha, sum_rest_ha)) %>% 
+  dplyr::select(G2008_1_ID, ADM0_CODE, ADM0_NAME, ADM0_NAME, ADM1_NAME, CONTINENT, REGION,
+                sum_pd, sum_rest_pd, sum_ha, sum_rest_ha, tnuts_ratio_pd, tnuts_ratio_ha, geometry) #%>% 
+st_write(., here("Input_data", "GAUL_tnuts_rest_ratio_ha_pd"), driver="ESRI Shapefile")  # From here go to script 
+join_ha_pd_gaul=read_sf(here("Input_data", "GAUL_tnuts_rest_ratio_ha_pd"))
+join_ha_pd_gaul=join_ha_pd_gaul %>% as_tibble() %>%  write_csv(here("Input_data", "join_ha_pd_gaul.csv")) # strange transformation. Creates weired rations in csv file
+
+# 
+# # Production
+# # Join of harvested area and production
+# join_pd_gaul = val_pd %>%
+#   group_by(ADM1_CODE, ADM0_CODE) %>%
+#   as_tibble() %>%
+#   # left_join(., val_ha, by="ADM1_CODE") %>%
+#   left_join(., shape_gaul, by = "ADM1_CODE") %>%
+#   rowwise() %>%
+#   mutate(tnuts_ratio_pd = sum_pd/sum(sum_pd, sum_rest_pd)) %>% #,
+#          # tnuts_ratio_ha = sum_ha/sum(sum_ha, sum_rest_ha)) %>%
+#   dplyr::select(G2008_1_ID, ADM0_CODE.x, ADM0_NAME, ADM0_NAME, ADM1_NAME, CONTINENT, REGION,
+#                 sum_pd, sum_rest_pd,
+#                 # sum_ha, sum_rest_ha,
+#                 tnuts_ratio_pd,
+#                 # tnuts_ratio_ha,
+#                 geometry.y) %>%
+# st_write(., here("Input_data", "GAUL_tnuts_rest_ratio_pd"), driver="ESRI Shapefile")  # From here go to script
+# join_pd_gaul=read_sf(here("Input_data", "GAUL_tnuts_rest_ratio_ha_pd"))
+# 
+# # Harvested area
+# join_ha_gaul = val_ha %>%
+#   group_by(ADM1_CODE, ADM0_CODE) %>%
+#   as_tibble() %>%
+#   # left_join(., val_ha, by="ADM1_CODE") %>%
+#   left_join(., shape_gaul, by = "ADM1_CODE") %>%
+#   rowwise() %>%
+#   mutate(
+#   # tnuts_ratio_pd = sum_pd/sum(sum_pd, sum_rest_pd),
+#          tnuts_ratio_ha = sum_ha/sum(sum_ha, sum_rest_ha)) %>%
+#   dplyr::select(G2008_1_ID, ADM0_CODE.x, ADM0_NAME, ADM0_NAME, ADM1_NAME, CONTINENT, REGION,
+#                 # sum_pd, sum_rest_pd,
+#                 sum_ha, sum_rest_ha,
+#                 # tnuts_ratio_pd,
+#                 tnuts_ratio_ha, geometry.y)# %>%
+#   st_write(., here("Input_data", "GAUL_tnuts_rest_ratio_ha"), driver="ESRI Shapefile")  # From here go to script
+# 
+# join_ha_gaul=read_sf(here("Input_data", "GAUL_tnuts_rest_ratio_ha_ha"))
+# 
+
+
+
+# Treenuts GAUL pixel  ---------------------------------------------------------------
+# Treenuts are loaded from the treenuts script
+# Production
+# # rbenchmark::benchmark(
+# prod_dat_gaul_spam = SPAM_GAUL_JOIN %>% 
+#   as_tibble() %>% 
+#   left_join(.,sph.prod.tib, by="G2008_1_ID") %>% 
+#   left_join(.,prd_dat, by="alloc_key") #%>% 
+#   mutate(tnuts = trnt_r_*rest) %>% 
+#   mutate(rest.1 = (1-trnt_r_)*rest) %>% 
+#   dplyr::select(alloc_key, UID, GID_0.x, GID_1.x, rec_type, tech_type, unit,
+#                 whea:rest, tnuts, rest.1) %>% 
+#   write.csv(here("Input_data", "prod_tnuts_spam_gaul.csv"))
+
+
+# c=read_csv(here("Input_data", "prod_tnuts_spam_gaul.csv"))
+# 
+# # Approach validation > do the numbers make sense?
+# xy=dat_join_prod %>% 
+#   group_by(tnuts) %>%
+#   top_n(n=10) %>% 
+#   dplyr::select(rest, tnuts, rest.1)
+# 
+# write_csv(dat_join_prod, here("Input_data", "dat_treenut_prod.csv"))
+# # st_as_sf()
+
+# Production 
+# rbenchmark::benchmark(
+# treenut_pd = as_tibble(join_ha_pd_gaul) %>% 
+#   rename(G2008_1_ID=G2008_1) %>% 
+# left_join(.,as_tibble(SPAM_GAUL_JOIN), by="G2008_1_ID") %>% 
+#   right_join(.,prd_dat, by="alloc_key")%>% 
+# dplyr::select(alloc_key, G2008_1_ID, ADM1_CODE, ADM1_NAME, ADM0_CODE, ADM0_NAME,
+#               name_cntr, name_adm1.x, iso3.x, rec_type, tech_type, unit, whea:rest, tnuts_ratio_pd) %>%
+#   rename(
+#     # ADM1_NAME = ADM1_NAME.x,
+#   #        ADM0_CODE = ADM0_CODE.x,
+#   #        ADM0_NAME = ADM0_NAME.x,
+#          name_adm1 = name_adm1.x, 
+#          iso3 =iso3.x) %>%
+#   write.csv(here("Input_data", "prod_tnuts_spam_gaul1.csv"))
+
+treenut_pd = as_tibble(join_ha_pd_gaul) %>% 
+  # rename(G2008_1_ID = G2008_1) %>%
+  left_join(.,as_tibble(SPAM_GAUL_JOIN), by="G2008_1_ID") %>% 
+  right_join(., prd_dat, by="alloc_key") %>% 
+  dplyr::select(alloc_key, G2008_1_ID, ADM1_CODE, ADM1_NAME.x, ADM0_CODE.x, ADM0_NAME.x,
+                name_cntr, name_adm1.x, iso3.x, rec_type, tech_type, unit, whea:rest, tnuts_ratio_pd) %>%
+  rename(name_adm1 = name_adm1.x,
+         ADM1_NAME = ADM1_NAME.x,
+         ADM0_CODE =ADM0_CODE.x,
+         ADM0_NAME =ADM0_NAME.x,
+         iso3 =iso3.x) %>%
+  write_csv(here("Input_data", "prod_tnuts_spam_gaul1.csv"))
+treenut_pd = read.csv(here("Input_data","prod_tnuts_spam_gaul1.csv"))
+
+
+# Harvested area 
+treenut_ha = as_tibble(join_ha_pd_gaul) %>% 
+  # rename(G2008_1_ID = G2008_1) %>%
+  left_join(.,as_tibble(SPAM_GAUL_JOIN), by="G2008_1_ID") %>% 
+  right_join(.,har_dat , by="alloc_key") %>% 
+  dplyr::select(alloc_key, G2008_1_ID, ADM1_CODE, ADM1_NAME.x, ADM0_CODE.x, ADM0_NAME.x,
+                name_cntr, name_adm1.x, iso3.x, rec_type, tech_type, unit, whea:rest, tnuts_ratio_ha) %>%
+  rename(name_adm1 = name_adm1.x,
+         ADM1_NAME = ADM1_NAME.x,
+         ADM0_CODE =ADM0_CODE.x,
+         ADM0_NAME =ADM0_NAME.x,
+         iso3 =iso3.x) %>%
+  write.csv(here("Input_data", "ha_tnuts_spam_gaul1.csv"))
+treenut_ha = read_csv(here("Input_data","ha_tnuts_spam_gaul1.csv"))
+
+# physical area 
+treenut_phys = as_tibble(join_ha_pd_gaul) %>% 
+  # rename(G2008_1_ID=G2008_1) %>%
+  left_join(.,as_tibble(SPAM_GAUL_JOIN), by="G2008_1_ID") %>% 
+  right_join(.,phy_dat, by="alloc_key") %>% 
+  dplyr::select(alloc_key, G2008_1_ID, ADM1_CODE, ADM1_NAME.x, ADM0_CODE.x, ADM0_NAME.x,
+                name_cntr, name_adm1.x, iso3.x, rec_type, tech_type, unit, whea:rest, tnuts_ratio_ha) %>%
+  rename(name_adm1 = name_adm1.x,
+         ADM0_CODE =ADM0_CODE.x,
+         ADM0_NAME =ADM0_NAME.x,
+         ADM1_NAME = ADM1_NAME.x,
+         iso3 =iso3.x) %>%
+  write.csv(here("Input_data", "phys_tnuts_spam_gaul1.csv"))
+treenut_phys = read.csv(here("Input_data","phys_tnuts_spam_gaul1.csv"))
+
+
+
+# Approach: Aim is to get dinal datasets on pixel level > Adm1 level
+# 1_Cbind the three datasets 
+# 2_Gather crops 
+# 3_Spread levels rec type
+# 4_Calculate yields after unit transformation  
+
+
+# # GThis data contains all crops spread > Values correspond to the unaggregated SPAM dataset 
+dat.phys = read_csv(here("Input_data", "phys_tnuts_spam_gaul1.csv"))  # this is pixel level spam
+dat.prod = read_csv(here("Input_data", "prod_tnuts_spam_gaul1.csv"))  # this is pixel level spam
+dat.harv = read_csv(here("Input_data", "ha_tnuts_spam_gaul1.csv"))    # this is pixel level spam
+
+# calulating treenuts and rest from share ratio
+dat_prod = treenut_pd %>%  mutate(tnuts = rest*tnuts_ratio_pd,
+                                  rest = rest*(1-tnuts_ratio_pd)) %>%
+  dplyr::select(-c(tnuts_ratio_pd)) %>% 
+  write.csv(here("Input_data", "prod_tnuts_rest_spam_gaul_FINAL1.csv"))
+
+dat_harv = dat.harv %>%  mutate(tnuts = rest*tnuts_ratio_ha,
+                                rest = rest*(1-tnuts_ratio_ha)) %>%
+  dplyr::select(-c(tnuts_ratio_ha))%>% 
+  write.csv(here("Input_data", "harv_tnuts_rest_spam_gaul_FINAL.csv"))
+
+dat_phys = dat.phys %>%  mutate(tnuts = rest*tnuts_ratio_ha,
+                                rest = rest*(1-tnuts_ratio_ha)) %>%
+  dplyr::select(-c(tnuts_ratio_ha)) %>%    
+  write.csv(here("Input_data", "phys_tnuts_rest_spam_gaul_FINAL.csv"))
+
+# This data contains  the final treenuts isolated from the initial rest. Rest here means now the herbs, etc
+dat_phys = read_csv(here("Input_data", "phys_tnuts_rest_spam_gaul_FINAL.csv")) #tnutsratios good here -checked
+dat_prod = read_csv(here("Input_data", "prod_tnuts_rest_spam_gaul_FINAL1.csv"))
+dat_harv = read_csv(here("Input_data", "harv_tnuts_rest_spam_gaul_FINAL.csv"))
+# head(dat.phys)
+# head(dat.prod)
+# head(dat.harv)
+
+# lst.tnuts = list(dat.harv, dat.phys, dat.prod)
+dat_nutsall <- do.call("rbind", list(dat_harv, dat_phys, dat_prod))
+
+# Calculating the yield for nuts 
+# yield_nut_dat = 
+do.call("rbind", list(dat_harv, dat_phys, dat_prod)) %>% 
+  dplyr::select(-c(X1)) %>%
+  pivot_longer(names_to = "crops", values_to = "value", cols = whea:tnuts) %>% 
+  pivot_wider(-unit, names_from = "rec_type", 
+              values_from = "value") %>% 
+  mutate(Y = (P*1000/H))%>% #checked for result. We are transforming mt to kg  when multiplying Production by 1000 to get unit kg/ha for yields
+  mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% 
+  # rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% 
+  write_csv(here("Input_data", "pxl_rec_spreadtec_gaul_spam_tnuts.csv"))
+dat.all=read_csv(here("Input_data", "pxl_rec_spread_tec_gaul_spam_tnuts.csv"))
+
+
+
+
+
+
+
+
+
+
+
+
+
+# 
+# 
+# 
+# # OLD ---------------------------------------------------------------------
+# # Loading the files and calculate final ratio -----------------------------
+# wd2=setwd(here("Input_data"))
+# 
+# sph.prod1 = read_sf(dsn = wd2, layer = "GADM_treenut_rest_pd")
+# sph.harv1 = read_sf(dsn = wd2, layer = "GADM_treenut_rest_ha")
+# sph.all = st_join(sph.prod, sph.harv, join = st_within)
+# write_sf(sph.all, dsn = here("Input_data"), driver = "ESRI Shapefile") #shapefile is called 'Input_data'
+# 
+# join_harv = st_join(SPAM_GADM_JOIN_GIS_left, sph.harv, join = st_intersects)
+# join_harv_prod_SPAM_GADM = st_join(join_harv, sph.prod, join = st_intersects)
+# 
+# # Joining GADM with SPAM -------------------------------------------------------
+# wd=setwd(here("Input_data"))
+# 
+# # Load and check shapefile 
+# shape=read_sf(dsn = wd, layer = "gadm36") # multipolygons containing all admin levels globally
+# shape=dplyr::select(shape, 'UID', 'geometry', 'GID_0', 'GID_1')
+# # shape_fil=filter(shape, GID_0 == 'FIN')
+# 
+# # Making SPAM spatial explicit 
+# dat_spam=read_csv(here("Input_data", "spam2010V2r0_global_Y_TA.csv"))
+# dat_spam=dplyr::select(dat_spam, 'x','y', 'alloc_key')
+# 
+# # Make SPAM spatial 
+# projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
+# dat_cent <- st_as_sf(x = dat_spam,                         
+#                      coords = c("x", "y"),
+#                      crs = projcrs)
+# 
+# class(dat_cent)
+# # dat_cent = filter(df, iso3=="FIN")
+# 
+# 
+# SPAM_GADM_JOIN_GIS_left = st_join(dat_cent, shape, join = st_intersects, left = T) #https://postgis.net/docs/ST_Within.html 
+# # head(SPAM_GADM_JOIN_GIS)
+# # class(SPAM_GADM_Join)
+# 
+# st_write(obj = SPAM_GADM_JOIN_GIS, dsn = here("Input_data"), driver = "ESRI Shapefile")
+# st_write(obj = SPAM_GADM_JOIN_GIS_left, dsn = here("Input_data"), driver = "ESRI Shapefile")
+# 
+# # Joining SPAM and GAUL 
+# 
+# 
+
diff --git a/Veggie_isolation/veggies.R b/Veggie_isolation/veggies.R
new file mode 100644
index 0000000000000000000000000000000000000000..904cb78263026e78371deac31282c3678f01ec62
--- /dev/null
+++ b/Veggie_isolation/veggies.R
@@ -0,0 +1,274 @@
+easypackages::packages(c("tidyverse", 'sf', 'raster','sp','dplyr','spatialEco',
+                         'exactextractr','tibble', 'here', "readr", "margrittr","tydr",
+                         "readxl", "foreign", "data.table", "spData","spDataLarge", "purrr", "rgdal",
+                         "ggplot2", "RColorBrewer", "ggplotify", "treemap", "treemapify"))
+# Load data
+v1= read_csv(here("Input_data", "FAOSTAT_data_9-28-2020_Yields_full.csv")) %>% 
+  rename(crop_code = "Item Code")
+
+spm=read_csv(here("Input_data", "SPAM_crops.csv"))
+
+# Getting the FAO CODES of the veggies aggregated in SPAM as "Vegetables"
+veg_FAO = c(358:463)
+length(veg_FAO) #106 veggies are aggregated in SPAM, of which only 29 are represented here. 
+v2=v1 %>% filter(crop_code %in% veg_FAO) %>% write_csv(here("Input_data", "veggies.csv"))
+
+A = read_csv(here("Input_data", "veggies3.csv"))
+n1 = read_csv(here("Input_data", "Nutri_cifos.csv"),skip=2) 
+
+# deleting rows
+n1.3= filter(n1, !(Family %in% c("Fish", "Meat", "Egg", "Dairy", "Sugar", "Nut", "Oil", "Grain", "Other", "Fruit")))
+
+# Calulatin the percentage for each of the nutrients 
+n2.1=n1.3 %>%
+  # drop_na() %>% 
+  dplyr::select(-c(Weight:Energy)) %>% 
+  mutate_at(vars(Protein:N),
+            .funs = list(perc = ~ (.)/sum(.)*100)) #validated that all percentage columns add up to 100
+# %>% write_csv(here("Input_data", "perc.csv")) 
+
+n2 <- n2.1 %>% dplyr::select(-c(Protein:N)) %>% 
+  # gather(n2.1, "content", "value_absolute", "Weight":"N") %>% 
+  gather(., "content_perc", "value_perc", "Protein_perc":"N_perc") %>% 
+  dplyr::select(-c("Raw NEVO", "Cooked NEVO")) %>%
+  rename(usage = X3) %>% 
+  right_join(.,v3, by = c("Product"= "CIFOS_nam"))
+
+vec_nutr = pull(n2, var=content_perc) %>% 
+  unique() %>% 
+  na.omit()
+
+dat_relationship_SNP$Triad_summary_geno
+
+# Creating vectors with subsets per group
+macro = vec_nutr[c(4,5,7)]
+micro = vec_nutr[c(8:length(vec_nutr))]
+fibr  = vec_nutr[6]
+water = vec_nutr[2]
+weight = vec_nutr[1]
+Energy = vec_nutr[3] 
+
+# adding factors
+n3=n2 %>% 
+  mutate(nutri_type = case_when(
+    content_perc %in% macro ~ "Macro_nutrients",
+    content_perc %in% micro ~ "Micro_nutrients",
+    content_perc %in% weight ~ "Weight",
+    content_perc %in% Energy ~ "Energy",
+    content_perc %in% water ~ "Water",
+    content_perc %in% fibr ~ "Fiber"))
+
+n4 = n3 %>% 
+  filter(content_perc != "Water_perc") %>% 
+  filter(content_perc != "Weight_perc") %>% 
+  filter(content_perc != "NA") %>% 
+  filter(content_perc != "Energy_perc") %>% 
+  filter(content_perc != "B12_perc") %>% 
+  filter(content_perc != "Cholesterol_perc") %>% 
+  filter(content_perc != "D_perc") %>% 
+  filter(content_perc != "DHA_perc") %>% 
+  filter(content_perc != "EPA_perc") %>% 
+  filter(nutri_type != "Water") %>% 
+  filter(nutri_type != "Weight") %>%
+  # filter(content_perc != "NA") %>% 
+  filter(nutri_type != "Energy") %>% 
+  filter(nutri_type != "Fiber")
+
+
+
+# Data
+data <- n4 %>% dplyr::select(value_perc, content_perc, veg_type, nutri_type) 
+
+
+  
+a4.4 = an4 %>% group_by(veg_type) %>% 
+  drop_na() %>%
+  mutate(value_perc = (value_absolute)/sum(value_absolute)*100) #%>% 
+  # write_csv(here("Input_data", "check4.csv"))
+
+# Calculates mean, sd, se and IC
+n4.5 <- an4 %>%
+  group_by(nutri_type, veg_type) %>% #content and nutri_type are dependent
+  summarise( 
+    n=n(),
+    # mean_perc=mean(value_perc),
+    # sd_perc=sd(value_perc),
+  mean=mean(value_absolute),
+sd=sd(value_absolute)) %>%
+  # mutate(se=sd/sqrt(n)) %>%
+  # mutate(ic=se * qt((1-0.05)/2 + .5, n-1)) %>% 
+  # group_by(content, veg_type) %>%
+  right_join(., a4.4, by = "veg_type") %>% 
+  # dplyr::rename(veg_type = veg_type.x) %>%
+    drop_na()
+write_csv(n4.5, here("Input_data", "check5.csv"))
+
+# n5 = n4 %>% 
+#   dplyr::select(-c("content", value_absolute))
+
+# all values mixed
+ggplot(a4.4, aes(x=veg_type, y=value_perc, fill=content))+
+  geom_bar(stat="identity", position= "dodge", alpha=0.7)+
+  facet_wrap(~nutri_type, scales = "free_y")
+
+# facet
+ggplot(n4, aes(x=veg_type, y=value, fill=content))+
+  geom_bar(stat="identity", position= "dodge", alpha=0.7) +
+  facet_wrap(~nutri_type, scales = "free_y")+
+  theme(axis.text.x=element_text(angle = -90, hjust = 0))
+  
+ggplot(n5, aes(x=veg_type, y=value_perc, fill=content_perc))+
+  geom_bar(stat="identity", position= "dodge", alpha=0.7) +
+  facet_wrap(~content_perc, scales = "free_y")+
+  theme(axis.text.x=element_text(angle = -90, hjust = 0))
+
+# nutrients stacked
+ggplot(a4.4, aes(x=veg_type, y=value_perc, fill=content))+
+  geom_bar(stat="identity", position= "dodge", alpha=0.7, show.legend = F) +
+  # geom_errorbar(data=n4.1, aes(x=veg_type, ymin=mean-sd , ymax=mean+sd), width=0.4, colour="black", alpha=0.9, size=1.3)+
+  facet_wrap(~content, scales = "free_y")+
+  theme(axis.text.x=element_text(angle = -90, hjust = 0))
+
+# Nutrients averaged
+ggplot(n4.5, aes(x=veg_type.x, y=value_absolute, fill=content))+
+  # geom_bar(stat="identity", position= "dodge", alpha=0.7, show.legend = F) +
+geom_bar(position = "dodge", stat = "summary", fun = "mean", alpha=0.7, show.legend = F)+
+  geom_errorbar(aes(x=veg_type.x, ymin=mean-sd , ymax=mean+sd), width=0.4, colour="black", alpha=0.9, size=1.3)+
+  facet_wrap(~content, scales = "free_y")+
+  theme(axis.text.x=element_text(angle = -90, hjust = 0))
+
+
+# Perc --------------------------------------------------------------------
+
+# avergae contribution
+ggplot(an4, aes(x=veg_type, y=value_absolute, fill=content))+
+  # geom_bar(stat="identity", position= "dodge", alpha=0.7, show.legend = F) +
+  geom_bar(position = "dodge", stat = "summary", fun = "mean", alpha=0.7, show.legend = F)+
+  geom_errorbar(data=n4.5, aes(x=veg_type, ymin=mean-sd , ymax=mean+sd), width=0.4, colour="black", alpha=0.9, size=1.3)+
+  facet_wrap(~content, scales = "free_y")+
+  theme(axis.text.x=element_text(angle = -90, hjust = 0))
+
+
+
+# Analyse by tables -------------------------------------------------------
+n4 %>% group_by(nutri_type, value_perc)
+
+write_csv(n4, here("Input_data", "check2.csv"))
+                
+
+
+
+# absolute --------------------------------------------------------------
+# deleting rows
+an1.3= filter(n1, !(Family %in% c("Fish", "Meat", "Egg", "Dairy", "Sugar", "Nut", "Oil", "Grain", "Other", "Fruit")))
+# 
+# # Calulatin the percentage for each of the nutrients 
+# an2.1=an1.3 %>%
+#   # drop_na() %>% 
+#   mutate_at(vars(Weight:N),
+#             .funs = list(perc = ~ (.)/sum(.)*100)) #validated that all percentage columns add up to 100
+# # %>% write_csv(here("Input_data", "perc.csv")) 
+
+an2 <- an1.3  %>% 
+  gather("content", "value_absolute", "Weight":"N") %>%
+  # gather(., "content_perc", "value_perc", "Weight_perc":"N_perc") %>% 
+  dplyr::select(-c("Raw NEVO", "Cooked NEVO")) %>%
+  rename(usage = X3) %>% 
+  right_join(.,v3, by = c("Product"= "CIFOS_nam"))
+
+vec_nutra = pull(an2, var=content) %>% 
+  unique() %>% 
+  na.omit()
+
+# Creating vectors with subsets per group
+macro = vec_nutra[c(4,5,7)]
+micro = vec_nutra[c(8:length(vec_nutr))]
+fibr  = vec_nutra[6]
+water = vec_nutra[2]
+weight = vec_nutra[1]
+Energy = vec_nutra[3] 
+
+# adding factors
+an3=an2 %>% 
+  mutate(nutri_type = case_when(
+    content %in% macro ~ "Macro_nutrients",
+    content %in% micro ~ "Micro_nutrients",
+    content %in% weight ~ "Weight",
+    content %in% Energy ~ "Energy",
+    content %in% water ~ "Water",
+    content %in% fibr ~ "Fiber"))
+
+an4 = an3 %>% 
+  filter(content != "Water") %>% 
+  filter(content != "Weight") %>% 
+  filter(content != "NA") %>% 
+  filter(content != "Energy") %>% 
+  filter(content != "B12") %>% 
+  filter(content != "Cholesterol") %>% 
+  filter(content != "D") %>% 
+  filter(content != "DHA") %>% 
+  filter(content != "EPA") #%>% 
+  # drop_na()
+
+
+# Data
+data <- an4 %>% dplyr::select(value_absolute, content, veg_type, nutri_type) 
+
+# Calculates mean, sd, se and IC
+an4.1 <- an4 %>%
+  group_by(content, veg_type) %>%
+  summarise( 
+    n=n(),
+    mean=mean(value_absolute),
+    sum=sum(value_absolute),
+    sd=sd(value_absolute)) %>%
+  mutate(se=sd/sqrt(n))  %>%
+  mutate(ic=se * qt((1-0.05)/2 + .5, n-1)) %>% 
+  # group_by(content, veg_type) %>%
+  right_join(., an4, by = "content") %>% 
+  dplyr::rename(veg_type = veg_type.x)
+
+
+
+
+# n5 = n4 %>% 
+#   dplyr::select(-c("content", value_absolute))
+
+# all values mixed
+ggplot(an4.1, aes(x=nutri_type, y=value_absolute, fill=veg_type))+
+  geom_bar(stat="identity", position= "dodge", alpha=0.7)
+
+# facet
+ggplot(n4, aes(x=veg_type, y=value, fill=content))+
+  geom_bar(stat="identity", position= "dodge", alpha=0.7) +
+  facet_wrap(~nutri_type, scales = "free_y")+
+  theme(axis.text.x=element_text(angle = -90, hjust = 0))
+
+ggplot(n5, aes(x=veg_type, y=value_perc, fill=content_perc))+
+  geom_bar(stat="identity", position= "dodge", alpha=0.7) +
+  facet_wrap(~content_perc, scales = "free_y")+
+  theme(axis.text.x=element_text(angle = -90, hjust = 0))
+
+# nutrients stacked
+ggplot(an4, aes(x=veg_type, y=mean, fill=content))+
+  geom_bar(stat="identity", position= "stack", alpha=0.7, show.legend = F) +
+  geom_errorbar(data=an4.1, aes(x=veg_type, ymin=mean-sd , ymax=mean+sd), width=0.4, colour="black", alpha=0.9, size=1.3)+
+  facet_wrap(~content, scales = "free_y")+
+  theme(axis.text.x=element_text(angle = -90, hjust = 0))
+
+# Nutrients averaged
+ggplot(an4, aes(x=veg_type, y=value_absolute, fill=veg_type))+  
+  geom_bar(position = "dodge", stat = "summary", fun = "mean", alpha=0.7, show.legend = F)+
+  # geom_errorbar(data=an4.1, aes(x=veg_type, ymin=mean-sd , ymax=mean+sd), width=0.4, colour="black", alpha=0.9, size=1.3)+
+  theme(axis.text.x=element_text(angle = -90, hjust = 0))#+
+  # facet_wrap(~content, scales = "free_y")
+  
+
+
+# Analyse by tables -------------------------------------------------------
+n4 %>% group_by(nutri_type, value_perc)
+
+write_csv(n4, here("Input_data", "check2.csv"))
+
+
+          
diff --git a/Zones/Admin1_SpatialJoin.R b/Zones/Admin1_SpatialJoin.R
new file mode 100644
index 0000000000000000000000000000000000000000..769afc6958919d21b0122ebea38264662c12f4ef
--- /dev/null
+++ b/Zones/Admin1_SpatialJoin.R
@@ -0,0 +1,122 @@
+easypackages::packages(c("tidyverse", 'sf', 'raster','sp','dplyr','spatialEco',
+                         'exactextractr','tibble', 'here', "readr",
+                         "readxl", "foreign", "data.table", "rbenchmark", "benchmarkme",
+                         "validate", "plyr"))
+
+# Set working directory
+setwd(here::here("Input_data"))
+
+# Load data ----------------------------------------------------------------
+# Yields ------------------------------------------------------------------
+dat_TA_yld <- read_csv("spam2010V2r0_global_Y_TA.csv")# Yields - all technologies together, ie complete crop - Data downloaded on 1. of September 2020
+dat_TI_yld <- read_csv("spam2010V2r0_global_Y_TI.csv")# Yields - Iirrigated portion of crop
+dat_TH_yld <- read_csv("spam2010V2r0_global_Y_TH.csv")# Yields - rainfed high inputs portion of crop
+dat_TL_yld <- read_csv("spam2010V2r0_global_Y_TL.csv")# Yields - rainfed low inputs portion of crop
+dat_TS_yld <- read_csv("spam2010V2r0_global_Y_TS.csv")# Yields - rainfed subsistence portion of crop
+dat_TR_yld <- read_csv("spam2010V2r0_global_Y_TR.csv")# Yields - rainfed portion of crop (= TA - TI, or TH + TL + TS)
+
+# Production
+dat_TA_pr <- read_csv("spam2010V2r0_global_P_TA.csv")
+dat_TI_pr <- read_csv("spam2010V2r0_global_P_TI.csv")
+dat_TH_pr <- read_csv("spam2010V2r0_global_P_TH.csv")
+dat_TL_pr <- read_csv("spam2010V2r0_global_P_TL.csv")
+dat_TS_pr <- read_csv("spam2010V2r0_global_P_TS.csv")
+dat_TR_pr <- read_csv("spam2010V2r0_global_P_TR.csv")
+
+# Physical area--------------------------------
+dat_TA_pa <- read_csv("spam2010V2r0_global_A_TA.csv")
+dat_TI_pa <- read_csv("spam2010V2r0_global_A_TI.csv")
+dat_TH_pa <- read_csv("spam2010V2r0_global_A_TH.csv")
+dat_TL_pa <- read_csv("spam2010V2r0_global_A_TL.csv")
+dat_TS_pa <- read_csv("spam2010V2r0_global_A_TS.csv")
+dat_TR_pa <- read_csv("spam2010V2r0_global_A_TR.csv")
+
+# Harvesting area----------------------------------
+dat_TA_ha <- read_csv("spam2010V2r0_global_H_TA.csv")
+dat_TI_ha <- read_csv("spam2010V2r0_global_H_TI.csv")
+dat_TH_ha <- read_csv("spam2010V2r0_global_H_TH.csv")
+dat_TL_ha <- read_csv("spam2010V2r0_global_H_TL.csv")
+dat_TS_ha <- read_csv("spam2010V2r0_global_H_TS.csv")
+dat_TR_ha <- read_csv("spam2010V2r0_global_H_TR.csv")
+
+# Create a spatial SPAM file 
+geo_spam=dplyr::select(dat_TA_pr, x,y, cell5m, name_adm1, iso3) 
+# %>% 
+# distinct_at(vars(name_adm1))
+
+# Make SPAM spatial 
+# projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
+# proj_mercator = "+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs"
+proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below 
+proj_meter = "+datum=WGS84 +units=m +no_defs"
+geo_spam1 <- st_as_sf(x = geo_spam,                         
+                      coords = c("x", "y"),
+                      crs = proj)
+
+# Load administrative unit shapefile which matches the SPAM adm1 zones
+Admin0 = st_read(here::here("Input_data","GAUL_GADM_SPAM_ULRIKE", "g2008_0_now","g2008_0.shp"))
+Admin1 = st_read(here::here("Input_data","GAUL_GADM_SPAM_ULRIKE", "g2008_1_now","g2008_1.shp")) #this is what we need
+Admin2 = st_read(here::here("Input_data","GAUL_GADM_SPAM_ULRIKE", "g2008_2_2020_11_02","g2008_2.shp")) #this is what we need
+
+
+# Define projection for GAULGADMSPAM
+proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below 
+proj_meter = "+datum=WGS84 +units=m +no_defs"
+Admin1 <- st_as_sf(x = Admin1,                         
+                   # coords = c("x", "y"),
+                   crs = proj)
+
+# Joining SPAM df and GAUL-GADM-SPAM --------------------------------------
+
+# Ulrike's comment: Shape files for GAUL-GADM-SPAM
+# Please note that the columns fips0, fips1 and fips2 are the ones which have the codes for admin level 0 (country), level 1 and level 2 which can be linked to SPAM results.
+# The matching names are in columns Adm0_name, name1 and name2.# 
+# Columns which have names O_fips1, O_fips2, O_name1 and O_name2 refer to an “old” classification system at level 1 and level 2. Its entries differ from the columns without “O_” in some EU countries and a few others where the statistic units did not match the admin units. Please note that the shape files themselves were not modified, only the entries in the dbf tables. 
+
+# adm1 unit match check
+geo_spam2 = dat_TA_pr %>% dplyr::select(iso3, cell5m, name_cntr, name_adm1, x, y) %>% 
+  distinct_at(vars(name_adm1), .keep_all = T) 
+
+# Define projection for GAULGADMSPAM
+proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below 
+proj_meter = "+datum=WGS84 +units=m +no_defs"
+geo_spam3 <- st_as_sf(x = geo_spam2,                         
+                   coords = c("x", "y"),
+                   crs = proj)
+
+
+# distinct GGS at NAM1
+adm1_distinct=Admin1 %>% as_tibble() %>% 
+  distinct_at(vars(NAME1), .keep_all = T) %>%
+  write_csv(., here::here("Input_data", "GGS_adm1_dist_full.csv"))
+
+# Converting GGS to dataframe for testing adm1 levels in excel 
+GGS_df_adm1 =Admin1 %>% dplyr::select(-geometry) %>% 
+  as_tibble() %>% 
+  # distinct_at(vars(NAME1), .keep_all = T) %>%
+  write_csv(., here::here("Input_data", "GGS_adm1_full.csv"))
+
+
+# convert to sf (spatial)
+proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below 
+proj_meter = "+datum=WGS84 +units=m +no_defs"
+adm1_distinct2 <- st_as_sf(x = adm1_distinct,                         
+                   # coords = c("x", "y"),
+                   crs = proj)
+
+# Spatial join 
+join_SPAM_adm1 = st_join(adm1_distinct2, geo_spam3, join = st_intersects)
+# 2857 - 2748 = 109 is the difference between join_SPAM_adm1 and adm1_distinct2
+
+# FUNcheck_incons = function(ped){
+#   ped1 = ped  %>% select(id, dam, sire)
+#   TestSire_random=ped1$sire==solution_pedsim2$Sire
+#   testParents= left_join(ped1, solution_pedsim2) %>%
+conS_check = join_SPAM_adm1 %>% 
+  rowwise() %>% 
+  mutate(testAdm1 = ifelse(NAME1==name_adm1, 1,0))%>%
+  mutate(testAdm0 = ifelse(ADM0_NAME==name_cntr, 1,0)) %>% 
+  as_tibble() %>% 
+  dplyr::select(NAME1, name_adm1, testAdm1, ADM0_NAME, name_cntr, testAdm0) %>%
+  write_csv(., here::here("Input_data", "ADm1_0_chck.csv"))
+