diff --git a/Raster extraction/Spatial_SPAM.R b/Raster extraction/Spatial_SPAM.R new file mode 100644 index 0000000000000000000000000000000000000000..4c49067185c55e613b9fb2cd27defb9da4dfed9c --- /dev/null +++ b/Raster extraction/Spatial_SPAM.R @@ -0,0 +1,218 @@ +easypackages::packages(c("tidyverse", 'sf', 'raster','sp','dplyr','spatialEco', + 'exactextractr','tibble', 'here', "readr", "margrittr","tydr", + "readxl", "foreign", "data.table", "spData","spDataLarge", "rgdal")) + +# remotes::install_github("Nowosad/spDataLarge") +# seting project directory +here('Input_data') +# Raster to point extraction --------------------------------------------- +# Making SPAM spatial explicit +dat_spam=read_csv("spam2010V2r0_global_Y_TA.csv") +dat_spam=dplyr::select(dat_spam, x,y, alloc_key, name_adm1, iso3) +setwd +# Make SPAM spatial +# projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0" +# proj_mercator = "+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs" +proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below +proj_meter = "+datum=WGS84 +units=m +no_defs" +dat_cent <- st_as_sf(x = dat_spam, + coords = c("x", "y"), + crs = proj) + + + +# dat_long = st_as_sf(coords = c("lon", "lat")) + +# dat_cent = filter(dat_cent, iso3=="FIN") +# dat_cent=dat_cent[1:10,] + + +# import raster datasets +dat_rast = raster("D:/Raw_dat/Raw_dat/Phosphorus/nitrogen-fertilizer-global-geotif/NFertilizer_global_geotif/nfertilizer_global.tif") +dat_rast = raster("D:/Raw_dat/Raw_dat/Phosphorus/nitrogen-fertilizer-global-geotif/NFertilizer_global_geotif/nfertilizer_global.tif") +dat_rast = raster("D:/Raw_dat/Raw_dat/Phosphorus/nitrogen-fertilizer-global-geotif/NFertilizer_global_geotif/nfertilizer_global.tif") +dat_rast_SOC = raster(here("Input_data", "sol_organic.carbon_usda.tif")) + + +dat_rast_peat = raster(here("Input_data", "PeatlandMap_A3_300dpi_GeoTIF_final.tif")) +length(dat_rast_peat) +is.na(dat_rast_peat) + +dat_rast_ISCRIC = raster("D:/Raw_dat/Raw_dat/Soil maps/wise_05min_v12/wise_05min_v12/Grid/smw5by5min/w001001.adf") +dat_clim=getData('worldclim', var='bio', res=5) +dat_rast_meantemp = dat_clim[[1]] + +# Set the coordinate reference system (CRS) (i.e., define the projection). +projection(dat_rast_SOC) <- "+proj=longlat +datum=WGS84 +no_defs +units=m" + +plot(clim[[1:4]]) +# Import Polygons + +# Point extraction of raster values and add it to the SPAM point -------- +funrast = function(r){ #insert the raster dataset from which you would like to extract values + r = raster::extract(r, # raster layer + dat_cent, # SF with centroids for buffer + buffer = 5000, # buffer size, units depend on CRS. Here in meters.10x10km(~5km radius) + fun=mean, # what value to extract + df=TRUE) # creating a df TRUE + r$ID = dat_cent$alloc_key # assigning the identifier of the original df to the extracted df + dat_spam <- merge(dat_spam, r, by.x = 'alloc_key', by.y = 'ID')# merge the two files together + } + +system.time( + funrast(dat_rast) + ) + +# Extraction +ex_Nfert_spam = funrast(dat_rast) +ex_soil_spam = funrast(dat_rast_ISCRIC) +ex_SOC_spam = funrast(dat_rast_SOC) +# start SOC extraction at 7.30, 19.th + +filter(test, nfertilizer_global>50) +summary(test) + + +# Zonal statistics ------------------------------------------------------- +# Load polygons +wd=setwd("C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/GADM/gadm36_shp") + +# Load and check shapefile +shape <- read_sf(dsn = wd, layer = "gadm36") # multipolygons containing all admin levels globally +# class(shape) +# shape$GID +shape=select(shape, 'UID', 'geometry', 'GID_0') +# y=filter(x, GID_0 == 'ALG') + +# Load raster +rast=dat_rast = raster("D:/Raw_dat/Raw_dat/Phosphorus/nitrogen-fertilizer-global-geotif/NFertilizer_global_geotif/nfertilizer_global.tif") + + +# Perform zonal statistics ------------------------------------------------ +FunZone=function(rast, shape){ # raster and vector + c=zonal.stats(shape, d, stats = c("mean")) + c$UID <- shape$UID # assigning the identifier of the original df to the extracted df + dat <- merge(shape, c, by.x = 'UID', by.y = 'UID') # merge the two files togethere +} + +dat$mean.nfertilizer_global +summary(dat$mean.nfertilizer_global) + + + +c=zonal.stats(y, dat_rast, stats = c("mean")) +c$UID <- x$UID # assigning the identifier of the original df to the extracted df +dat <- merge(shape, c, by.x = 'UID', by.y = 'UID') # merge the two files together + +# Point = overlay(rast, df) #does not work + +BufferFun <- function(rast,ds) { + raster::extract(rast, # raster layer + ds, # SF with centroids for buffer + buffer = 5000, # buffer size, units depend on CRS. Here in meters.10x10km(~5km radius) + fun=mean, # what value to extract + df=TRUE) + # , # creating a df TRUE + # na.rm=TRUE) # if is.na is true +} + + +cent_mean_FIN$ID = ds$alloc_key # assigning the identifier of the original df to the extracted df +centroids <- merge(dat, cent_mean_FIN, by.x = 'alloc_key', by.y = 'ID') # merge the two files together + +# Joining GADM with SPAM ------------------------------------------------------- +# Load and check shapefile +shape=read_sf(dsn = here("Input_data"), layer = "gadm36") # multipolygons containing all admin levels globally +shape=dplyr::select(shape, 'UID', 'geometry', 'GID_0', 'GID_1') +# shape_fil=filter(shape, GID_0 == 'FIN') + +# Making SPAM spatial explicit +dat_spam=read_csv(here("Input_data", "spam2010V2r0_global_Y_TA.csv")) +dat_spam=dplyr::select(dat_spam, 'x','y', 'alloc_key') + +# Make SPAM spatial +projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0" +dat_cent <- st_as_sf(x = dat_spam, + coords = c("x", "y"), + crs = projcrs) + +class(dat_cent) +# dat_cent = filter(df, iso3=="FIN") + + +SPAM_GADM_JOIN_GIS_left = st_join(dat_cent, shape, join = st_within, left = T) #https://postgis.net/docs/ST_Within.html +# head(SPAM_GADM_JOIN_GIS) +# class(SPAM_GADM_Join) + +st_write(obj = SPAM_GADM_JOIN_GIS, dsn = here("Input_data"), driver = "ESRI Shapefile") +st_write(obj = SPAM_GADM_JOIN_GIS_left, dsn = here("Input_data"), driver = "ESRI Shapefile") + + +# Joining SPAM and GAUL -------------------------------------------------- +EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", + "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR") + +EU_longGAUL = c("Austria", "Belgium","Finland", "Bulgaria", "Croatia", "Czech Republic", "Denmark", "Spain", "France", + "Germany", "Greece", "Ireland", " Italy", " Latvia", " Lithuania", "Luxembourg","Malta", "Netherlands", + "Poland", "Portugal", "Rumania", "Slovakia", "Slovenia", "Estonia", "Sweden", "U.K. of Great Britain and Northern Ireland") + +# dat_gaul_asap=read_sf(here("Input_data", "gaul1_asap.shp")) #modified GAUL map (less adm1 detail) +dat_gaul=read_sf(here("Input_data", "g2008_1.shp")) #actual GAUL map +dat_spam=read_csv(here("Input_data", "spam2010V2r0_global_Y_TA.csv")) +dat_spam_EU = dat_spam %>% + dplyr::select(alloc_key, name_adm1, name_cntr, x,y,iso3) %>% + filter(iso3 %in% EU_iso3) + +# Make the spam dataset spatial +projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0" +dat_spam_EU <- st_as_sf(x = dat_spam, + coords = c("x", "y"), + crs = projcrs) + +dat_gaul = filter(dat_gaul, ADM0_NAME %in% EU_longGAUL) + +# Joining SPAM and GAUL for EU27 +SPAM_GAUL_JOIN_GIS = st_join(dat_spam_EU, dat_gaul, join = st_within, left = T) #https://postgis.net/docs/ST_Within.html +SPAM_GAUL_JOIN_GIS_intersect = st_join(dat_spam_EU, dat_gaul, join = st_intersects, left = T) + +# Joining the geometries of GAUL and SPAM +dat_gaul_fil = dat_gaul %>% dplyr::select(name1, geometry) %>% + slice_head(n=3) +geometry_join = left_join(SPAM_GAUL_JOIN_GIS, dat_gaul_fil, by = "name1") + +# ZOnal statistics > Calculating mean of SOC for EU27 +SPAM_GAUL_JOIN_GIS_fil = SPAM_GAUL_JOIN_GIS %>% + dplyr::select(alloc_key, geometry) + top_n(n=20) + +c=zonal.stats(dat_gaul_fil, dat_rast_SOC, stats = c("mean")) + + +# Extracting SOC on pixel level ------------------------------------------- + +dat_spam_EU_fil = dat_spam_EU %>% + top_n(n=50) +proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below +proj_meter = "+datum=WGS84 +units=m +no_defs" +dat_cent <- st_as_sf(x = dat_spam_EU_fil, + coords = c("x", "y"), + crs = proj) +#testing the NA remove +SOC_rast_wNA = BufferFun(SOC_rast,dat_cent) +SOC_rast_NoNa = BufferFun(dat_rast_SOC,dat_cent) #5000 m buffer, witout nA + + +SOC_dat_spam_EU = cbind(SOC_rast,dat_spam_EU) +write_csv(SOC_dat_spam_EU, here("Input_data", "SOC_pixel_EU.csv")) + +# Calculating percentages and Peat index +SOC_EU_pix_perc = SOC_dat_spam_EU %>% + mutate(percentageSOC = sol_organic.carbon_usda/2) %>% + mutate(peat_index = if_else(percentageSOC > 30, "Peat", "Non-peat")) %>% + write_csv(here("Input_data", "SOCperc_pixel_EU_Peat.csv")) + +# Then aggregate to adm1 level and bind. + + +cent_mean_FIN$ID = ds$alloc_key # assigning the identifier of the original df to the extracted df +centroids <- merge(dat, cent_mean_FIN, by.x = 'alloc_key', by.y = 'ID') # merge the two files together diff --git a/Raster extraction/Spatial_SPAM2.R b/Raster extraction/Spatial_SPAM2.R new file mode 100644 index 0000000000000000000000000000000000000000..5f683f185cad25fb8465b5ea096c94387b00659c --- /dev/null +++ b/Raster extraction/Spatial_SPAM2.R @@ -0,0 +1,327 @@ +easypackages::packages(c("tidyverse", 'sf', 'raster','sp','dplyr','spatialEco', + 'exactextractr','tibble', 'here', "readr", "margrittr","tydr", + "readxl", "foreign", "data.table", "spData","spDataLarge", "purrr", "rgdal" + )) + +remotes::install_github("Nowosad/spDataLarge") +setwd(here("Input_data")) + +# Raster to point extraction --------------------------------------------- +# Making SPAM spatial explicit +dat_spam=read_csv("C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_yield.csv/spam2010V2r0_global_Y_TA.csv") +dat_spam=dplyr::select(dat_spam, x,y, alloc_key, name_adm1, iso3) + +# Make SPAM spatial +# projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0" +# proj_mercator = "+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs" +proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below +proj_meter = "+datum=WGS84 +units=m +no_defs" +dat_cent <- st_as_sf(x = dat_spam, + coords = c("x", "y"), + crs = proj) + + + +# dat_long = st_as_sf(coords = c("lon", "lat")) + +# dat_cent = filter(dat_cent, iso3=="FIN") +# dat_cent=dat_cent[1:10,] + + +# import raster datasets +dat_rast = raster("D:/Raw_dat/Raw_dat/Phosphorus/nitrogen-fertilizer-global-geotif/NFertilizer_global_geotif/nfertilizer_global.tif") +dat_rast = raster("D:/Raw_dat/Raw_dat/Phosphorus/nitrogen-fertilizer-global-geotif/NFertilizer_global_geotif/nfertilizer_global.tif") +dat_rast = raster("D:/Raw_dat/Raw_dat/Phosphorus/nitrogen-fertilizer-global-geotif/NFertilizer_global_geotif/nfertilizer_global.tif") +dat_rast_SOC = raster(here("Input_data", "sol_organic.carbon_usda.tif")) +dat_rast_peat = raster(here("Input_data", "PeatlandMap_A3_300dpi_GeoTIF_final.tif")) +dat_rast_ISCRIC = raster("D:/Raw_dat/Raw_dat/Soil maps/wise_05min_v12/wise_05min_v12/Grid/smw5by5min/w001001.adf") +dat_clim=getData('worldclim', var='bio', res=5) +dat_rast_meantemp = dat_clim[[1]] + +# Set the coordinate reference system (CRS) (i.e., define the projection). +projection(dat_rast_SOC) <- "+proj=longlat +datum=WGS84 +no_defs +units=m" + +plot(clim[[1:4]]) +# Import Polygons + +# Point extraction of raster values and add it to the SPAM point -------- +funrast = function(r){ #insert the raster dataset from which you would like to extract values + r = raster::extract(r, # raster layer + dat_cent, # SF with centroids for buffer + buffer = 5000, # buffer size, units depend on CRS. Here in meters.10x10km(~5km radius) + fun=mean, # what value to extract + df=TRUE) # creating a df TRUE + r$ID = dat_cent$alloc_key # assigning the identifier of the original df to the extracted df + dat_spam <- merge(dat_spam, r, by.x = 'alloc_key', by.y = 'ID')# merge the two files together +} + + + +# Extraction +ex_Nfert_spam = funrast(dat_rast) +ex_soil_spam = funrast(dat_rast_ISCRIC) +ex_SOC_spam = funrast(dat_rast_SOC) +# start SOC extraction at 7.30, 19.th + +filter(test, nfertilizer_global>50) +summary(test) + +# Zonal statistics ------------------------------------------------------- +# Load polygons +wd=setwd("C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/GADM/gadm36_shp") + +# Load and check shapefile +shape <- read_sf(dsn = here("Input_data"), layer = "gadm36") # multipolygons containing all admin levels globally + +# class(shape) +# shape$GID +shape=dplyr::select(shape, 'UID', 'geometry', 'GID_0', "GID_1") +# y=filter(x, GID_0 == 'ALG') + +shape_gaul=read_sf(here("Input_data", "g2008_1.shp")) #actual GAUL map + + +# Load raster +rast=dat_rast = raster("D:/Raw_dat/Raw_dat/Phosphorus/nitrogen-fertilizer-global-geotif/NFertilizer_global_geotif/nfertilizer_global.tif") + + +# Perform zonal statistics ------------------------------------------------ +FunZone=function(rast, shape){ # raster and vector + c=zonal.stats(shape, d, stats = c("mean")) + c$UID <- shape$UID # assigning the identifier of the original df to the extracted df + dat <- merge(shape, c, by.x = 'UID', by.y = 'UID') # merge the two files togethere +} + +dat$mean.nfertilizer_global +summary(dat$mean.nfertilizer_global) + + + +c=zonal.stats(y, dat_rast, stats = c("mean")) +c$UID <- x$UID # assigning the identifier of the original df to the extracted df +dat <- merge(shape, c, by.x = 'UID', by.y = 'UID') # merge the two files together + +# Point = overlay(rast, df) #does not work + +BufferFun <- function(rast,ds) { + raster::extract(rast, # raster layer + ds, # SF with centroids for buffer + buffer = 5000, # buffer size, units depend on CRS. Here in meters.10x10km(~5km radius) + fun=mean, # what value to extract + df=TRUE) + # , # creating a df TRUE + # na.rm=TRUE) # if is.na is true +} + + +cent_mean_FIN$ID = ds$alloc_key # assigning the identifier of the original df to the extracted df +centroids <- merge(dat, cent_mean_FIN, by.x = 'alloc_key', by.y = 'ID') # merge the two files together + + +# Clip raster ------------------------------------------------------------ +# https://www.earthdatascience.org/courses/earth-analytics/lidar-raster-data-r/crop-raster-data-in-r/ +crop_extent <- read_sf(here("Input_data", "shape_gaul_fin_adm1.shp")) + +# Plot raster +plot(dat_rast_SOC_perc) #validated as hald of dat_rast_SOC below +# plot(dat_rast_SOC) + +# Plot shapefile +plot(crop_extent %>%dplyr::select(ADM1_CODE), + main = "Shapefile imported into R - crop extent", + axes = T, + border = "blue") + +# crop the lidar raster using the vector extent +SOC_test_perc <- crop(dat_rast_SOC_perc, crop_extent) +plot(crop, main = "Cropped soc perc") + +# add shapefile on top of the existing raster +plot(crop_extent, add = TRUE) + + + + +# Joining GADM with SPAM ------------------------------------------------------- +# Load and check shapefile +shape=read_sf(dsn = here("Input_data"), layer = "gadm36") # multipolygons containing all admin levels globally +shape=dplyr::select(shape, 'UID', 'geometry', 'GID_0', 'GID_1') +# shape_fil=filter(shape, GID_0 == 'FIN') + +# Making SPAM spatial explicit +dat_spam=read_csv(here("Input_data", "spam2010V2r0_global_Y_TA.csv")) +dat_spam=dplyr::select(dat_spam, 'x','y', 'alloc_key') + +# Make SPAM spatial +projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0" +dat_spam <- st_as_sf(x = dat_spam, + coords = c("x", "y"), + crs = projcrs) + +class(dat_cent) +# dat_cent = filter(df, iso3=="FIN") + + +SPAM_GADM_JOIN_GIS_left = st_join(dat_spam, shape, join = st_within, left = T) #https://postgis.net/docs/ST_Within.html +# head(SPAM_GADM_JOIN_GIS) +# class(SPAM_GADM_Join) + +st_write(obj = SPAM_GADM_JOIN_GIS, dsn = here("Input_data"), driver = "ESRI Shapefile") +st_write(obj = SPAM_GADM_JOIN_GIS_left, dsn = here("Input_data"), driver = "ESRI Shapefile") + + +# Joining SPAM and GAUL and GADM EU -------------------------------------------------- +EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", + "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR") + +EU_longGAUL = c("Austria", "Belgium","Finland", "Bulgaria", "Croatia", "Czech Republic", "Denmark", "Spain", "France", + "Germany", "Greece", "Ireland", " Italy", " Latvia", " Lithuania", "Luxembourg","Malta", "Netherlands", + "Poland", "Portugal", "Rumania", "Slovakia", "Slovenia", "Estonia", "Sweden", "U.K. of Great Britain and Northern Ireland") + +dat_gaul=read_sf(here("Input_data", "gaul1_asap.shp")) + + +dat_spam=read_csv(here("Input_data", "spam2010V2r0_global_Y_TA.csv")) +dat_spam_EU = dat_spam %>% + dplyr::select(alloc_key, name_adm1, name_cntr, x,y,iso3) %>% + filter(iso3 %in% EU_iso3) + +# Make the spam dataset spatial +projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0" +dat_spam_EU <- st_as_sf(x = dat_spam_EU, + coords = c("x", "y"), + crs = projcrs) + +dat_gaul=read_sf(here("Input_data", "gaul1_asap.shp")) +dat_gaul <- st_as_sf(x = dat_gaul, + coords = c("x", "y"), + crs = projcrs) + +# Source: http://worldmap.harvard.edu/data/geonode:g2008_1 +dat_gaul_adm1=read_sf(here("Input_data", "g2008_1.shp")) +dat_gaul_adm1 <- st_as_sf(x = dat_gaul_adm1, + coords = c("x", "y"), + crs = projcrs) + +GADM <- read_sf(dsn = here("Input_data"), layer = "gadm36") # multipolygons containing all admin levels globally +dat_gaul <- st_as_sf(x = GADM, + coords = c("x", "y"), + crs = projcrs) + +# dat_gaul_EU = dat_gaul %>% filter(name0 %in% EU_longGAUL) + +# # Joining SPAM and GAUL for EU27 +# # SPAM_GAUL_JOIN_GIS = st_join( +# # ZOnal statistics > Calculating mean of SOC for EU27 +# SPAM_GAUL_JOIN_GIS_fil = SPAM_GAUL_JOIN_GIS %>% +# dplyr::select(alloc_key, geometry) +# top_n(n=20) + +# c=zonal.stats(dat_gaul_fil, dat_rast_SOC, stats = c("mean")) +# (dat_spam_EU, dat_gaul, join = st_within, left = T) #https://postgis.net/docs/ST_Within.html +SPAM_GAUL_JOIN_GIS_intersect = st_join(dat_spam_EU, dat_gaul_adm1, join = st_intersects, left = T) #, left = T) +SPAM_GAUL_GADM = st_join(SPAM_GAUL_JOIN_GIS_intersect, GADM, join = st_intersects, left = T) + +SPAM_GAUL_GADM_select = SPAM_GAUL_GADM %>% + dplyr::select(iso3, name_cntr, name_adm1, geometry, #SPAM + name1, name1_shr, name0,# GAUL + GID_1, GID_0, NAME_1, VARNAME_1) %>% # GADM + write_sf(here("Input_data", "SPAM_GAUL_SF_join_intersect.shp"), driver = "ESRI Shapefile") + +# Adding the GADM polygon geometries to the SPAM_GAUL_GADM dataset +GADM_df = as_tibble(GADM) +SPAM_GADM_GAUL_df = as_tibble(SPAM_GAUL_GADM_select) %>% + dplyr::select(geometry, GID_1) %>% + left_join(., GADM_df, by = "GID_1") + +# Extracting PEAT /non-PEAT ------------------------------------------- +# Reclassify peat = 1, non=peat = 0; Peat is defined as SOC => 30% organic matter + + +# create classification matrix +dat_rast_SOC_perc <- dat_rast_SOC/2 + +reclass_df <- c(0, 30, 0, + 30, Inf, 1) +writeRaster(dat_rast_SOC_perc, here("Input_data", "dat_SOC_USDA_percent.tiff")) + +# reshape the object into a matrix with columns and rows +reclass_m <- matrix(reclass_df, + ncol = 3, + byrow = TRUE) +# reclassify the raster using the reclass object - reclass_md +SOC_reclas <- reclassify(dat_rst_SOC_perc, + reclass_m) + +writeRaster(SOC_reclas, here("Input_data", "SOC_reclass_perc.tif")) +SOC_reclass = raster(here("Input_data", "SOC_reclass_perc.tif")) +cellstats(SOC_reaclass) +??cellstats +cellStats(SOC_reclass, stat = max) + +shape_gaul=read_sf(here("Input_data", "g2008_1.shp")) #actual GAUL map +shape_gaul=dplyr::select(shape_gaul, 'G2008_1_ID', 'ADM1_CODE', "ADM0_CODE", 'ADM0_NAME', 'ADM1_NAME', "CONTINENT", "REGION", "geometry") + +shape2 = shape_gaul %>% slice(1:1000) + +extract() +?extract +EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", + "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR") +# write.csv(EU_iso3, here("Input_data", "EU27_iso3_lst.csv")) +length(EU_iso3) #27 member states > the list comes from gams + +# Check if the EU countries iso3 are all present in dat.phys etc. +# EU_iso3 %in% dat.phys$iso3 #Validation if all EU27 countries are in the dfs below +shapeGL = shape_gaul %>% + filter(ADM1_CODE == 1244) %>% + write_sf(here("Input_data", "shape_gaul_fin_adm1.shp")) + + + +freqs = exact_extract(SOC_reclas, shapeGL, function(value, coverage_fraction) { + sum(coverage_fraction[value == 1], na.rm = T) / sum(coverage_fraction, na.rm = T)}) + +BufferFun <- function(rast,ds) { + exact_extract(rast, # raster layer + as(ds, "sf"), # SF with centroids for buffer + buffer = 5000) + #, # buffer size, units depend on CRS. Here in meters.10x10km(~5km radius) + # fun=mean, # what value to extract + # df=TRUE) + # , # creating a df TRUE + # na.rm=TRUE) # if is.na is true +} + +?exact_ectract + +min(SOC_reclas) +cellStats(SOC_reclas, count) + + +dat_spam_EU_fil = dat_spam_EU %>% + top_n(n=50) +proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below +proj_meter = "+datum=WGS84 +units=m +no_defs" +dat_cent <- st_as_sf(x = dat_spam_EU_fil, + coords = c("x", "y"), + crs = proj) +#testing the NA remove +SOC_rast_wNA = BufferFun(SOC_rast,dat_cent) +SOC_rast_NoNa = BufferFun(dat_rast_SOC,dat_cent) #5000 m buffer, witout nA + + +SOC_dat_spam_EU = cbind(SOC_rast,dat_spam_EU) +write_csv(SOC_dat_spam_EU, here("Input_data", "SOC_pixel_EU.csv")) + +# Calculating percentages and Peat index +SOC_EU_pix_perc = SOC_dat_spam_EU %>% + mutate(percentageSOC = sol_organic.carbon_usda/2) %>% + mutate(peat_index = if_else(percentageSOC > 30, "Peat", "Non-peat")) %>% + write_csv(here("Input_data", "SOCperc_pixel_EU_Peat.csv")) + +# Then aggregate to adm1 level and bind. + + +cent_mean_FIN$ID = ds$alloc_key # assigning the identifier of the original df to the extracted df +centroids <- merge(dat, cent_mean_FIN, by.x = 'alloc_key', by.y = 'ID') # merge the two files together diff --git a/Raster extraction/peat.R b/Raster extraction/peat.R new file mode 100644 index 0000000000000000000000000000000000000000..fb4ed106c55ffbe2ef127e5e5a988a9ba0ae81b4 --- /dev/null +++ b/Raster extraction/peat.R @@ -0,0 +1,363 @@ +easypackages::packages(c("tidyverse", 'sf', 'raster','sp','dplyr','spatialEco', + 'exactextractr','tibble', 'here', "readr", "margrittr","tydr", + "readxl", "foreign", "data.table", "spData","spDataLarge", "purrr", "rgdal", "lwgeom" +)) + +# Loading vector data +dat_spam=read_csv(here("Input_data", "spam2010V2r0_global_Y_TA.csv")) +dat_spam=dplyr::select(dat_spam, x,y, alloc_key, name_adm1, iso3) + +# Make SPAM spatial +# projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0" +# proj_mercator = "+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs" +proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below +proj_meter = "+datum=WGS84 +units=m +no_defs" +dat_cent <- st_as_sf(x = dat_spam, + coords = c("x", "y"), + crs = proj) + + +shape_gaul=read_sf(here("Input_data", "g2008_1.shp")) #actual GAUL map +shape_gaul=dplyr::select(shape_gaul, 'G2008_1_ID', 'ADM1_CODE', "ADM0_CODE", 'ADM0_NAME', 'ADM1_NAME', "CONTINENT", "REGION", "geometry") + +# country_mapping +cntr_map_global = read_sf(here("Input_data", "GAUL2008_SPAM_LINK", "GAUL2008_SPAM_LINK.shp")) #checked the map. IS te right GAUL map + +# SUbsetting EU-27 +EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", + "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR") + +cntr_EU2_gaul_shape = cntr_map_global %>% #Pixel level data + filter(iso3 %in% EU_iso3) %>% + dplyr::select(alloc_key,G2008_1_ID,iso3,name_cntr,name_adm1,ADM1_CODE, ADM0_CODE) +# distinct_at(vars(G2008_1_ID, iso3)) %>% +# st_join(., shape_gaul, join = st_intersects, right = T) + +# With this shape file we can clip the extent of EU27 area +gaul_EU27 = st_join(shape_gaul, cntr_EU2_gaul_shape, join = st_intersects, left = T) %>% + drop_na() %>% + distinct_at(vars(G2008_1_ID.x, alloc_key, ADM1_NAME, ADM0_NAME, geometry)) %>% + rename( + # ADM1_NAME =ADM1_NAME.x, + # ADM0_NAME =ADM0_NAME.x, + G2008_1_ID =G2008_1_ID.x) %>% + st_as_sf() + +# write_sf(gaul_EU27, here("Input_data", "gaul_EU27.shp")) +read_sf(here("Input_data", "gaul_EU27.shp")) + +# tHIS IS FOR Futther clipping +gaul_uniq_EU27 = gaul_EU27 %>% + as_tibble() %>% + dplyr::select(-alloc_key) %>% + distinct_all() %>% + st_as_sf #%>% + # write_sf(here("Input_data", "gaul_uniq_EU27.shp")) + +gaul_unique_EU27 = read_sf((here("Input_data", "gaul_uniq_EU27.shp"))) + +# verifying the clipping. Issue: looks skewed in the norther countries.. +plot(gaul_uniq_EU27 %>% dplyr::select(ADM1_NAME)) + +# # Check if 27 EU memeberstates > CORRECT! +# lengthEU27 = gaul_EU27 %>% +# as_tibble() %>% +# distinct_at(vars(ADM0_NAME)) + +# Loading raster data +dat_rast_SOC = raster(here("Input_data", "sol_organic.carbon_usda.tif")) + + +# dat_rast_SOC_perc <- dat_rast_SOC/2 #conversion to percentage according to openlandmap website +# writeRaster(dat_rast_SOC_perc, here("Input_data", "dat_SOC_USDA_percent.tiff")) #validated +dat_rast_SOC_perc=raster(here("Input_data", "dat_SOC_USDA_percent.tif")) +# crs(dat_rast_SOC_perc) + +# Reclassify raster ------------------------------------------------------ +# https://www.earthdatascience.org/courses/earth-analytics/lidar-raster-data-r/classify-raster/ + +# # Creating threshold for reclassification +# reclass_df <- c(0, 30, 0, +# 30, Inf, 1) +# +# # reshape the object into a matrix with columns and rows +# reclass_m <- matrix(reclass_df, +# ncol = 3, +# byrow = TRUE) +# # reclassify the raster using the reclass object - reclass_md +# SOC_reclas <- reclassify(dat_rst_SOC_perc, +# reclass_m) +# +# writeRaster(SOC_reclas, here("Input_data", "SOC_reclass_perc.tif")) +# SOC_reclass = raster(here("Input_data", "SOC_reclass_perc.tif")) +# + +# Clip raster ------------------------------------------------------------ +# https://www.earthdatascience.org/courses/earth-analytics/lidar-raster-data-r/crop-raster-data-in-r/ +crop_extent <- read_sf(here("Input_data", "gaul_uniq_EU27.shp")) + +# Plot raster +# plot(dat_rast_SOC_perc) #validated as hald of dat_rast_SOC below +# # plot(dat_rast_SOC) +# +# # Plot shapefile +# plot(crop_extent %>%dplyr::select(ADM1_CODE), +# main = "Shapefile imported into R - crop extent", +# axes = T, +# border = "blue") +# +# # crop the lidar raster using the vector extent +# SOC_test_perc <- crop(dat_rast_SOC_perc, crop_extent) +# plot(crop, main = "Cropped soc perc") +# +# # add shapefile on top of the existing raster +# plot(crop_extent, add = TRUE) +# +# plot(gaul_uniq_EU27) +# # EU raster CLIP +EU27_area_clip <- crop(dat_rast_SOC_perc, gaul_uniq_EU27) +# plot(EU27_area_clip) +# plot(dat_rast_SOC_perc) +writeRaster(EU27_area_clip, here("Input_data", "EU27Areaclip.tif")) +EU27_area_clip = raster(here("Input_data", "EU27Areaclip.tif")) +# Note: takes only a few seconds for EU + +## Raster to point polygon (https://geocompr.robinlovelace.net/geometric-operations.html#spatial-vectorization) +Sys.time() +SOC_perc_EU27_points = rasterToPoints(EU27_area_clip, spatial = TRUE) +Sys.time() + +save(SOC_perc_EU27_points,file=here("Input_data", "SOC_points_perc_EU.RData")) + +gc(reset=T) + +Sys.time() +st_as_sf(SOC_perc_EU27_points) +Sys.time() + + + +# # Alternatively to convert raster to points +# SOC_perc_EU27_points_sf = st_as_sf(EU27_area_clip, as_points = TRUE, merge = FALSE) +# ?st_as_sf +# +# +# write_sf(SOC_perc_EU27_points, here("Input_data", "SOC_perc_EU27_points.shp")) # memory exhaust +# Sys.time() +# # max(SOC_points) +# > summary(SOC_points$sol_organic.carbon_usda) +# Min. 1st Qu. Median Mean 3rd Qu. Max. +# 0.000 4.000 5.000 5.877 7.000 60.000 + + +# HWSD map ---------------------------------------------------------------- +# Here the polygon is imported - this shape file was created in Qgis +SOC_perc_EU27Q = st_read(here("Input_data","point_soc_perc_EU.shp")) + +gaul_unique_EU27 = read_sf((here("Input_data", "gaul_uniq_EU27.shp"))) +SOC_topsoil_SOCperc_HWSD_Q= st_read(here("Input_data","SOC_topsoil_SOCperc_HWSD_Q.shp"), + agr = c(SOC = "constant")) +SOC_topsoil_SOCperc_HWSD_Q +## Spatial Join with SPAM pixels +# All SOC points are remained and only in next step summarized +Sys.time() +Join_SOC_gaul_EU27_HWSD = st_join(SOC_topsoil_SOCperc_HWSD_Q, gaul_unique_EU27, join = st_intersects, left = T) +Sys.time() +# +# intersection = st_intersection(SOC_topsoil_SOCperc_HWSD_Q,gaul_unique_EU27)# +# Join_SOC_gaulLeft_EU27_HWSD = st_join(gaul_unique_EU27, SOC_topsoil_SOCperc_HWSD_Q) +# plot(Join_pointSOC_gaul_EU27_HWSD) +# plot(gaul_uniq_EU27) + +# Recalulating area - since we do not have point polygons +SOC_HWSD = Join_SOC_gaul_EU27_HWSD %>% + # rowwise() %>% + mutate(area = geometry %>% + st_zm() %>% + st_area()) +# test area +lapland = SOC_HWSD %>% filter(ADM1_NAME == "Lapland") %>% + as_tibble() %>% + summarise(sum(area)) +#Finland Lappland = 403498.752337km2 - h + +# Define new factors peat nonpeat based on SOC condition +SOC_prop_HWSD = SOC_HWSD %>% + rowwise() %>% + mutate(peat = ifelse(SOC >= 30, "peat", "non_peat")) %>% #rowwise assigning peat non_peat as grouping variabkle + group_by(ADM1_NAME, peat) %>% + mutate(area_adm1 = sum(area)) %>% + ungroup() %>% + distinct_at(.vars = 4) + +# Here we calulate the share of peat and non peat per adm1 units +# computationally intense > takes a few minutes only for EU +SOC_prop_HWSD1 = SOC_prop_HWSD %>% + group_by(ADM1_NAME, ADM0_NAME, peat) %>% + summarise(area_n_peat = mean(area_adm1)) + +SOC_prop_HWSD2= SOC_prop_HWSD1 %>% + as_tibble() %>% + group_by(ADM1_NAME) %>% + mutate(area_peat_prop = (area_n_peat)/sum(area_n_peat)*100) #%>% + # write_sf(here("Input_data", "SOC_perc_adm1_EU27_HWSD.shp")) + +gaul_tbl = gaul_unique_EU27 %>% + as_tibble() %>% + dplyr::select(G2008_1_ID, ADM0_NAME, ADM1_NAME) + +# Creating a csv file without eometries +SOC_gaul_HWSD_EU27_PEAT=SOC_prop_HWSD2 %>% + dplyr::select(ADM0_NAME, ADM1_NAME, peat, area_peat_prop) %>% + right_join(.,gaul_tbl, by="ADM1_NAME") %>% + dplyr::select(-G2008_1_ID) %>% + group_by(ADM1_NAME, peat) %>% + distinct() %>% + write_csv(here("Input_data", "SOC_gaul_HWSD_EU27_PEAT.csv")) + + + + +# Join back the ADM0 +PEAT_gaul_EU27_HWSD = st_join(SOC_prop_HWSD2, gaul_unique_EU27, join = st_intersects, left = T) + + +# write_csv(SOC_prop_HWSD2, here( "Input_data", "SOC_perc_adm1_EU27_HWSD.csv" )) +peat = read_csv(here("Input_data", "SOC_perc_adm1_EU27_HWSD.csv")) + +# Checking the peat data +# Checking peat tibble data = area per peat and country +SOC_gaul_HWSD_EU27_PEAT_check = SOC_gaul_HWSD_EU27_PEAT%>% # Applying group_by & summarise + group_by(ADM0_NAME.x, peat) %>% + summarize(mean(area_peat_prop)) %>% + write_csv(here("Input_data", "SOC_perc_adm0ag_EU27_HWSD.csv")) + + + +peat2 = peat %>% + group_by(ADM1_NAME, peat) %>% + summarise(area_peat_prop) %>% + st_as_sf() + st_join() + +# max peat values. +peatmax = peat %>% + filter(peat == "peat") %>% + arrange() + +Gaul_area = gaul_unique_EU27 %>% + rowwise() %>% + mutate(area = geometry %>% + st_zm() %>% + st_area()) +#area finland +HWSDSOC_area =SOC_topsoil_SOCperc_HWSD_Q%>% + rowwise() %>% + mutate(area = geometry %>% + st_zm() %>% + st_area()) +#area finland + +# Area checked for Lapland finland > real=10000km2. here=98859km2. + +# Join data back to + + + +# Removing NAs - areas which are not in the area of EU27 show NA +SOC_GAUL = Join_pointSOC_gaul_EU27 %>% + drop_na() %>% + + + + + + +write_sf(Join_pointSOC_gaul_EU27, here("Input_data", "Join_pointSOC_gaul_EU27.shp")) +# Test time: for ~9mio points we need ~3 min +# start: "2021-01-25 21:58:05 CET" +# end: "2021-01-25 22:00:53 CET" + +## Calculate the summary statistic + + +# HWSD Intersection ------------------------------------------------------ +gaul_unique_EU27 = read_sf((here("Input_data", "gaul_uniq_EU27.shp"))) +SOC_topsoil_SOCperc_HWSD_Q= st_read(here("Input_data","SOC_topsoil_SOCperc_HWSD_Q.shp"), + agr = c(SOC = "constant")) +intersection_SOC_gaulLeft_EU27_HWSD = st_intersection(st_make_valid(gaul_unique_EU27), st_make_valid(SOC_topsoil_SOCperc_HWSD_Q)) +# area +SOC_HWSD_intersection = intersection_SOC_gaulLeft_EU27_HWSD %>% + rowwise() %>% + mutate(area = geometry %>% + st_zm() %>% + st_area()) + +# 1- Define new factors peat nonpeat based on SOC condition +SOC_prop_HWSD_insectn = intersection_SOC_gaulLeft_EU27_HWSD %>% + rowwise() %>% + mutate(peat = ifelse(SOC >= 30, "peat", "non_peat")) #rowwise assigning peat non_peat as grouping variabkle + +# aggregate to adm1 level https://r-spatial.github.io/sf/reference/aggregate.sf.html +SOC_prop_HWSD_insectn_adm1 = SOC_prop_HWSD_insectn %>% + aggregate(x= .$SOC, by = list(ADM0_NAME, ADM1_NAME, peat), FUN = "mean") + + group_by(ADM1_NAME, peat) + + +#%>% + mutate(area_adm1 = sum(area)) %>% + ungroup() %>% + distinct_at(.vars = 4) + + + + +# PEAT with zonal statistics --------------------------------------------- +# https://stackoverflow.com/questions/47691200/raster-in-r-create-zonal-count-of-specific-cell-values-without-reclassification + +# +# I would like to know if there is way to create zonal statistics for RasterLayerObjects, specifically the count of a given cell value (e.g. a land-use class) in R without having to reclassify the whole raster. The solution should be memory efficient in order to work on large raster files i.e. no extraction of the values into a matrix in R is desired. +# +# Below an example of how I handle it until now. In this case I reclassify the original raster to hold only 1 for the value of interest and missings for all other values. +# +# My proposed solution creates both, redundant data and additional processing steps to get me to my initial goal. I thought something like zonal(r1[r1==6],r2,"count") would work but obviously it does not (see below). + +# generate reproducible Raster +library("raster") + +## RASTER 1 (e.g. land-use classes) +r1 <- raster( crs="+proj=utm +zone=31") +extent(r1) <- extent(0, 100, 0, 100) +res(r1) <- c(5, 5) +values(r1) <- sample(10, ncell(r1), replace=TRUE) +plot(r1) + +## RASTER 2 (containing zones of interest) +r2 <- raster( crs="+proj=utm +zone=31") +extent(r2) <- extent(0, 100, 0, 100) +res(r2) <- c(5, 5) +values(r2) <- c(rep(1,100),rep(2,100),rep(3,100),rep(4,100)) +plot(r2) + +# (1) ZONAL STATISTICS +# a. how many cells per zone (independent of specific cell value) +zonal(r1,r2,"count") + +# b. how many cells per zone of specific value 6 +zonal(r1[r1==6],r2,"count") +# -> fails + +# with reclassification +r1.reclass<- + reclassify(r1, + matrix(c(1,5,NA, + 5.5,6.5,1, #class of interest + 6.5,10,NA), + ncol=3, + byrow = T), + include.lowest=T # include the lowest value from the table. + ) +zonal(r1.reclass,r2,"count") + + diff --git a/SPAM_Data_Transformation/SPAM2010_aggregationADM1.R b/SPAM_Data_Transformation/SPAM2010_aggregationADM1.R new file mode 100644 index 0000000000000000000000000000000000000000..884fce7276b92a27ceb51d3e30f6948cc6d18d7d --- /dev/null +++ b/SPAM_Data_Transformation/SPAM2010_aggregationADM1.R @@ -0,0 +1,818 @@ +easypackages::packages(c("tidyverse", 'sf', 'raster','sp','dplyr','spatialEco', + 'exactextractr','tibble', 'here', "readr", + "readxl", "foreign", "data.table", "rbenchmark", "benchmarkme", + "validate", "plyr")) + +# Set working directory +setwd(here::here("Input_data")) + +# Load data ---------------------------------------------------------------- +# Yields ------------------------------------------------------------------ +dat_TA_yld <- read.csv("spam2010V2r0_global_Y_TA.csv", locale = locale(encoding = "LATIN2"))# Yields - all technologies together, ie complete crop - Data downloaded on 1. of September 2020 +dat_TI_yld <- read_csv("spam2010V2r0_global_Y_TI.csv", locale = locale(encoding = "LATIN2"))# Yields - Iirrigated portion of crop +dat_TH_yld <- read_csv("spam2010V2r0_global_Y_TH.csv", locale = locale(encoding = "LATIN2"))# Yields - rainfed high inputs portion of crop +dat_TL_yld <- read_csv("spam2010V2r0_global_Y_TL.csv", locale = locale(encoding = "LATIN2"))# Yields - rainfed low inputs portion of crop +dat_TS_yld <- read_csv("spam2010V2r0_global_Y_TS.csv",locale = locale(encoding = "LATIN2"))# Yields - rainfed subsistence portion of crop +dat_TR_yld <- read_csv("spam2010V2r0_global_Y_TR.csv", locale = locale(encoding = "LATIN2"))# Yields - rainfed portion of crop (= TA - TI, or TH + TL + TS) + +# Production +dat_TA_pr <- read_csv("spam2010V2r0_global_P_TA.csv",locale = locale(encoding = "LATIN2")) # LATIN2 encoding is used for proper display of nordic and slavic names +dat_TI_pr <- read_csv("spam2010V2r0_global_P_TI.csv",locale = locale(encoding = "LATIN2")) +dat_TH_pr <- read_csv("spam2010V2r0_global_P_TH.csv",locale = locale(encoding = "LATIN2")) +dat_TL_pr <- read_csv("spam2010V2r0_global_P_TL.csv",locale = locale(encoding = "LATIN2")) +dat_TS_pr <- read_csv("spam2010V2r0_global_P_TS.csv",locale = locale(encoding = "LATIN2")) +dat_TR_pr <- read_csv("spam2010V2r0_global_P_TR.csv",locale = locale(encoding = "LATIN2")) + +# Physical area-------------------------------- +dat_TA_pa <- read_csv("spam2010V2r0_global_A_TA.csv",locale = locale(encoding = "LATIN2")) +dat_TI_pa <- read_csv("spam2010V2r0_global_A_TI.csv",locale = locale(encoding = "LATIN2")) +dat_TH_pa <- read_csv("spam2010V2r0_global_A_TH.csv",locale = locale(encoding = "LATIN2")) +dat_TL_pa <- read_csv("spam2010V2r0_global_A_TL.csv",locale = locale(encoding = "LATIN2")) +dat_TS_pa <- read_csv("spam2010V2r0_global_A_TS.csv",locale = locale(encoding = "LATIN2")) +dat_TR_pa <- read_csv("spam2010V2r0_global_A_TR.csv",locale = locale(encoding = "LATIN2")) + +# Harvesting area---------------------------------- +dat_TA_ha <- read_csv("spam2010V2r0_global_H_TA.csv",locale = locale(encoding = "LATIN2")) +dat_TI_ha <- read_csv("spam2010V2r0_global_H_TI.csv",locale = locale(encoding = "LATIN2")) +dat_TH_ha <- read_csv("spam2010V2r0_global_H_TH.csv",locale = locale(encoding = "LATIN2")) +dat_TL_ha <- read_csv("spam2010V2r0_global_H_TL.csv",locale = locale(encoding = "LATIN2")) +dat_TS_ha <- read_csv("spam2010V2r0_global_H_TS.csv",locale = locale(encoding = "LATIN2")) +dat_TR_ha <- read_csv("spam2010V2r0_global_H_TR.csv",locale = locale(encoding = "LATIN2")) + +# Create a spatial SPAM file +geo_spam=dplyr::select(dat_TA_pr, x,y, cell5m, name_adm1, iso3) +# %>% + # distinct_at(vars(name_adm1)) + +# Make SPAM spatial +# projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0" +# proj_mercator = "+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs" +proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below +proj_meter = "+datum=WGS84 +units=m +no_defs" +geo_spam1 <- st_as_sf(x = geo_spam, + coords = c("x", "y"), + crs = proj) + +# Load administrative unit shapefile which matches the SPAM adm1 zones +Admin0 = st_read(here::here("Input_data","GAUL_GADM_SPAM_ULRIKE", "g2008_0_now","g2008_0.shp")) +Admin1 = st_read(here::here("Input_data","GAUL_GADM_SPAM_ULRIKE", "g2008_1_now","g2008_1.shp")) #this is what we need +Admin2 = st_read(here::here("Input_data","GAUL_GADM_SPAM_ULRIKE", "g2008_2_2020_11_02","g2008_2.shp")) #this is what we need + +# Here I manually adjusted the NAME1 values to the once used in SPAM2010. see Documentation for more. +Admin1Adj = st_read(here::here("Input_data","g2008_1_now", "g2008_1.shp"), options = "ENCODING=LATIN2")#WINDOWS-1252 + +# Now the values from GGS that are still not matching the SPAM2010 adm1 values are recoded +Admin1Ad2 = Admin1Adj %>% as_tibble() %>% + mutate(NAME1 = case_when(ADM0_CODE == 196 & #Philipines + NAME1 =="Autonomous region in Muslim Mindanao (ARMM)"~ "Autonomous region in Muslim Mi", + TRUE ~ NAME1)) %>% + mutate(NAME1 = case_when(ADM0_CODE == 196 & #Philipines + NAME1 =="Region IX (Zamboanga Peninsula)" ~ "Region IX (Zamboanga Peninsula", + TRUE ~ NAME1)) %>% + mutate(NAME1 = case_when(ADM0_CODE == 19 & #Azerbaijan + NAME1 =="Administrative unit not available" ~ "Administrative unit not availa", + TRUE ~ NAME1)) %>% + mutate(NAME1 = case_when(ADM0_CODE == 152 & #Malawi + NAME1 =="Area under National Administration" ~ "Area under National Administra", + TRUE ~ NAME1)) %>% + mutate(NAME1 = case_when(ADM0_CODE == 196 & #Philipines + NAME1 =="Cordillera Administrative region (CAR)" ~ "Cordillera Administrative regi", + TRUE ~ NAME1)) %>% + mutate(NAME1 = case_when(ADM0_CODE == 196 & #Philipines + NAME1 =="Autonomous region in Muslim Mindanao (ARMM)"~ "Autonomous region in Muslim Mi", + TRUE ~ NAME1)) %>% + mutate(NAME1 = case_when(ADM0_CODE == 108 & #Haiti + NAME1 =="NOROESTE" ~ "Nord-Est", + TRUE ~ NAME1)) %>% + mutate(NAME1 = case_when(ADM0_CODE == 111 & # Honduras + NAME1 =="Nor Oriental" ~ "Gracias A Dios", + TRUE ~ NAME1)) + +# issue1: Some country like cote d ivoire are still wrongly written. Check out windows decoding. +# Issue2: Check out also where to combine the arable land. + +# ENCODING=WINDOWS-1252 +# Define projection for GAULGADMSPAM +proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below +proj_meter = "+datum=WGS84 +units=m +no_defs" +Admin1 <- st_as_sf(x = Admin1Adj, + # coords = c("x", "y"), + crs = proj) + +# Joining SPAM df and GAUL-GADM-SPAM -------------------------------------- + +# Ulrike's comment: Shape files for GAUL-GADM-SPAM +# Please note that the columns fips0, fips1 and fips2 are the ones which have the codes for admin level 0 (country), level 1 and level 2 which can be linked to SPAM results. +# The matching names are in columns Adm0_name, name1 and name2.# +# Columns which have names O_fips1, O_fips2, O_name1 and O_name2 refer to an “old” classification system at level 1 and level 2. Its entries differ from the columns without “O_” in some EU countries and a few others where the statistic units did not match the admin units. Please note that the shape files themselves were not modified, only the entries in the dbf tables. + +# adm1 unit match check +spam_admin_link = + dat_TA_pr%>% dplyr::select(iso3, cell5m, name_cntr, name_adm1) %>% + distinct_at(vars(name_adm1), .keep_all = T) #%>% + full_join(., Admin1Adj, by=c("name_adm1"="NAME1")) + +adm1_distinct=Admin1Adj %>% as_tibble() %>% + distinct_at(vars(NAME1), .keep_all = T) +str(Admin1) + +adm1_distinct=as_tibble(Admin1) %>% + distinct_at(vars(FIPS1))#, .keep_all = T) + +# adm2 units +adm2o_distinct=as_tibble(Admin1) %>% + distinct_at(vars(O_NAME))#, .keep_all = T) + +adm2_distinct=as_tibble(Admin1) %>% + distinct_at(vars(FIPS1))#, .keep_all = T) + +# Validation - Making sure SPAM2010 matches with admin Shapefiles + +# Adm0 - unique countries +# SPAM2010 +# 201 (name_cntr) +# 201 (iso3) + +# ADM1 GAUL_SPAM_GADM +# > 275 (ADM0_NAME) +# > 250 (FIPS0) +# + +# ADM1 +# 1. Same amount of rows when distinct adm1 names? + +# SPAM2010 Admin1 units +# 2493 (name_adm1) + +# ADM1 GAUL_SPAM_GADM +# 3230 (NAME1) +# 3079 (O_NAME1 +# 2898 (FIPS1) + +# ADM2 GAUL_SPAM_GADM +# 3080 (O_NAME1) +# 2758 (NAME1) +# 3222 (O_FIPS1) +# 2898 (FIPS1) + + +# How to deal with inconsistency? ----------------------------------------- +# 1. Spatial join +# 2. Fulljoin > detect NA's then check what is different + +# Spatial join +join_SPAM_adm1 = st_join(Admin1, geo_spam1, join = st_intersects, largest = T) + +# the final adm1 units are calculated by first full join and then by removing the NAs (=all variables that are not +# in the SPAM2010 and some that are not represented in the GGS (no shapes available for England subunits for example)). +# Finally, we have now 2468 adm1 units globally (of initially 2493 adm1 units; Diff: 25 adm1 units). The area of these +# 25 adm1 units needs to be added to the representative units. Otherwise, the total arable land will be off. TODO! + +# Disinct GGS adjusted +adm1_distinctAdj=Admin1Adj2 %>% as_tibble() %>% + distinct_at(vars(NAME1))#, .keep_all = T) +#2,741 rows + +# Distinct SPAM2010 +spam_admin_link = dat_TA_pr %>% dplyr::select(iso3, cell5m, name_cntr, name_adm1) %>% + distinct_at(vars(name_adm1))#, .keep_all = T) %>% +#2,493 (diff=248) + + +# Left joining the SPAM2010 and GGS adjusted. THe result is not yet matching in terms of rows +FINAL_adm1_spam2010_GGS = dat_TA_pr %>% dplyr::select(iso3, cell5m, name_cntr, name_adm1) %>% + distinct_at(vars(name_adm1), .keep_all = T) #%>% + left_join(., adm1_distinctAdj, by=c("name_adm1" = "NAME1")) %>% + dplyr::select(iso3, cell5m, name_adm1, ADM1_CODE, FIPS1, FIPS0) #%>% + # drop_na() + +write_csv(FINAL_adm1_spam2010_GGS, here::here("Input_data", "test67.csv")) + + +# Difference between "FINAL_adm1_spam2010_GGS" (rows 2493 ) AND "spam_admin_link" (rows 2468). +# The difference of 25 rows is due to 8 adm1 district available in the SPAM2010 but not available in the GGS dataset. +# Further, spelling errors could be a reason as well. + + + +# With aggregation long ----------------------------------------------------- +ChangeNames <- function(x) { ## Renaming function + crp <- c('whea', 'rice', 'maiz', 'barl', 'pmil', 'smil', 'sorg', 'ocer', 'pota', 'swpo', 'yams', 'cass', 'orts', 'bean', 'chic', 'cowp', 'pige', 'lent', 'opul', 'soyb', + 'grou', 'cnut', 'oilp', 'sunf', 'rape', 'sesa', 'ooil', 'sugc', 'sugb', 'cott', 'ofib', 'acof', 'rcof', 'coco', 'teas', 'toba', 'bana', 'plnt', 'trof', 'temf', 'vege', 'rest') + names(x)[10:51] <- crp + return(x) +} + +GatherFun <- function(k){ + k %>% gather('crop', 'valu', whea:rest) +} + +# AggregateFun_mean <- function(z){ #for yield +# ddply(z, .(name_adm1, crop), +# function(z) mean(z$valu[z$valu!=0])) +# } + +# AggregateFun_sum <- function(g){ #for area +# ddply(g, .(name_adm1, crop), +# function(g) sum(g$valu)) +# } + +AggregateFun_sum <- function(g){ #for area + g %>% dplyr::select(iso3, name_adm1, cell5m, rec_type, tech_type, crop, unit, valu) %>% + group_by(iso3, name_adm1, crop) %>% + mutate(valu = sum(valu)) %>% + distinct_at(vars(name_adm1), .keep_all = T) +} + + +CollapseFun <- function(h){ + h %>% dplyr::distinct_at(vars('name_adm1', 'crop'), .keep_all = TRUE) +} + +CombineFun <- function(d,s){ + full_join(d,s, by=c("name_adm1", 'crop')) +} + +CleanFun <- function(l){ +l %>% + dplyr::rename(value = V1) %>%dplyr::select(iso3, name_cntr, name_adm1, cell5m, + rec_type, tech_type, crop, unit, valu) +} + +# TransformFun_yld <- function(d){ +# p <- ChangeNames(d) +# l <- GatherFun(p) +# r <- AggregateFun_mean(l) +# y <- CollapseFun(l) +# u <- CombineFun(r,y) +# j <- CleanFun(p) +# } + +# TransformFun_area <- function(d){ +p <- ChangeNames(dat_TA_pa) +l <- GatherFun(p) +r <- AggregateFun_sum(l) +y <- CollapseFun(l) +u <- CombineFun(r,y) +j <- CleanFun(u) +# } + + +# Applying transformation to production +TA_prd <- TransformFun_area(dat_TA_pr) +TH_prd <- TransformFun_area(dat_TH_pr) +TI_prd <- TransformFun_area(dat_TI_pr) +TL_prd <- TransformFun_area(dat_TL_pr) +TR_prd <- TransformFun_area(dat_TR_pr) +TS_prd <- TransformFun_area(dat_TS_pr) +lst.prd <- list(TA_prd, TH_prd, TI_prd, TL_prd, TR_prd, TS_prd) +dat_prd <- do.call("rbind", lst.prd) # merging all tech types +write_csv(dat_prd, here::here("Input_data", "production_all_rec_tech_adm1.csv")) + +#Applying transformation to physical_tech types +TA_phy <- TransformFun_area(dat_TA_pa) +TH_phy <- TransformFun_area(dat_TH_pa) +TI_phy <- TransformFun_area(dat_TI_pa) +TL_phy <- TransformFun_area(dat_TL_pa) +TR_phy <- TransformFun_area(dat_TR_pa) +TS_phy <- TransformFun_area(dat_TS_pa) +lst.phy <- list(TA_phy, TH_phy, TI_phy, TL_phy, TR_phy, TS_phy) +dat_phy <- do.call("rbind", lst.phy) # merging all tech types +write_csv(dat_phy, here::here("Input_data", "physicalarea_all_rec_tech_adm1.csv")) + +#Applying transformation to physical_tech types +TA_har <- TransformFun_area(dat_TA_ha) +TH_har <- TransformFun_area(dat_TH_ha) +TI_har <- TransformFun_area(dat_TI_ha) +TL_har <- TransformFun_area(dat_TL_ha) +TR_har <- TransformFun_area(dat_TR_ha) +TS_har <- TransformFun_area(dat_TS_ha) +lst.har <- list(TA_har, TH_har, TI_har, TL_har, TR_har, TS_har) +dat_har <- do.call("rbind", lst.har) # merging all tech types +write_csv(dat_har, here::here("Input_data", "harvestedarea_all_rec_tech_adm1.csv")) + +# Validating aggregation ------------------------------------------------- +# 1. Sum up adm1 units and compare if you get same aggregation results +# 2. Do this for all rec and tech types +dat_har %>% as_tibble() +dat_phy %>% as_tibble() +dat_prd %>% as_tibble() + + + + + + + + +# Independent -- Merging treenuts with SPAM ------------------------------- +# LOAD data from previous step (due to memory overuse) +# setwd(here('Input_data', "Rectypes_full_cropspread")) +# yld_dat <- read_csv(here::here("Input_data", "harvestedarea_all_rec_tech_adm1.csv") +prd_dat <- read_csv(here::here("Input_data", "production_all_rec_tech_adm1.csv")) +har_dat <- read_csv(here::here("Input_data", "harvestedarea_all_rec_tech_adm1.csv")) +phy_dat <- read_csv(here::here("Input_data", "physicalarea_all_rec_tech_adm1.csv")) + + +# TREENUTS WITH GADM ------------------------------------------------------ +# # Load and check shapefile +# wd=setwd(here("Input_data")) +# sph.prod = read_sf(dsn = wd, layer = "GADM_treenut_rest_pd") +# sph.harv = read_sf(dsn = wd, layer = "GADM_treenut_rest_ha") +# sph.all = st_join(sph.prod, sph.harv, join = st_within) +# # write_sf(sph.all, dsn = here("Input_data", driver = "ESRI shapefile")) +# sph.prod.tib = as_tibble(sph.prod) +# sph.prod.tib = dplyr::select(sph.prod.tib, UID, GID_0, GID_1, trnt_r_) +# +# sph.harv.tib = as_tibble(sph.harv) +# sph.harv.tib = dplyr::select(sph.harv.tib, UID, GID_0, GID_1, trnt_r_) +# +# # loading SPAM-GADM join +# spam.gadm.link = read_sf(dsn = here("Input_data", "GADMtoSPAM", "GADM_to_SPAM_within"), +# layer = "GADM_to_SPAM_within") +# # +# # Treenuts GADM pixel --------------------------------------------------------------- +# # CAUTION: THESE FILES ARE WRONG! THE RATIO IS WRONGLY CALULATED (not grouped by adm1 and adm0 level) +# # Treenuts are loaded from the treenuts script +# # Production +# # rbenchmark::benchmark( +# spam.gadm.link %>% +# as_tibble() %>% +# left_join(.,sph.prod.tib, by="UID") %>% +# left_join(.,prd_dat, by="alloc_key") %>% +# mutate(tnuts = trnt_r_*rest) %>% +# mutate(rest.1 = (1-trnt_r_)*rest) %>% +# dplyr::select(alloc_key, UID, GID_0.x, GID_1.x, rec_type, tech_type, unit, +# whea:rest, tnuts, rest.1) %>% +# write.csv(here("Input_data", "prod_tnuts_spam_gadm.csv")) +# +# # +# # c=read_csv(here("Input_data", "prod_tnuts_spam_gadm.csv")) +# # +# # Approach validation > do the numbers make sense? +# # xy=dat_join_prod %>% +# # group_by(tnuts) %>% +# # top_n(n=10) %>% +# # dplyr::select(rest, tnuts, rest.1) +# +# # write_csv(dat_join_prod, here("Input_data", "dat_treenut_prod.csv")) +# # st_as_sf() +# +# # Harvested area +# # rbenchmark::benchmark( +# spam.gadm.link %>% +# as_tibble() %>% +# left_join(., har_dat, by="alloc_key") %>% +# left_join(.,sph.harv.tib, by="UID") %>% +# mutate(tnuts = trnt_r_*rest) %>% +# mutate(rest.1 = (1-trnt_r_)*rest) %>% +# dplyr::select(alloc_key, UID, GID_0.x, GID_1.x, rec_type, tech_type, unit, +# whea:rest, tnuts, rest.1) %>% +# write.csv(here("Input_data", "harv_tnuts_spam_gadm")) +# # ) +# +# # Physical land +# # rbenchmark::benchmark( +# spam.gadm.link %>% +# as_tibble() %>% +# left_join(., phy_dat, by="alloc_key") %>% +# left_join(.,sph.harv.tib, by="UID") %>% +# mutate(tnuts = trnt_r_*rest) %>% +# mutate(rest.1 = (1-trnt_r_)*rest) %>% +# dplyr::select(alloc_key, UID, GID_0.x, GID_1.x, rec_type, tech_type, unit, +# whea:rest, tnuts, rest.1) %>% +# write.csv(here("Input_data", "phys_tnuts_spam_gadm")) +# +# # Approach: Aim is to get dinal datasets on pixel level > Adm1 level +# 1_Cbind the three datasets +# 2_Gather crops +# 3_Spread levels rec type +# 4_Calculate yields after unit transformation + +# # 1. Load data +# # Global +# dat.phys = read_csv(here("Input_data", "phys_tnuts_spam_gadm.csv")) +# dat.prod = read_csv(here("Input_data", "prod_tnuts_spam_gadm,csv")) +# dat.harv = read_csv(here("Input_data", "harv_tnuts_spam_gadm.csv")) +# +# +# # lst.tnuts = list(dat.harv, dat.phys, dat.prod) +# dat_nutsall <- do.call("rbind", list(dat.harv, dat.phys, dat.prod)) +# +# # GLobal - Issue: Do this with the new laptop! +# # Principle of this function is to not generate any variable as this can lead to overuse of RAM. +# do.call("rbind", list(read_csv(here("Input_data", "phys_tnuts_spam_gadm")), +# read_csv(here("Input_data", "prod_tnuts_spam_gadm")), +# read_csv(here("Input_data", "harv_tnuts_spam_gadm")))) %>% +# dplyr::select(-c(rest, X1)) %>% +# pivot_longer(names_to = "crops", values_to = "value", cols = whea:rest.1) %>% +# pivot_wider(-unit, names_from = "rec_type", +# values_from = "value" ) %>% +# mutate(Y = (P*1000/H))%>% #checked for result. We are transforming mt to tons when multiplying Producction by 1000 +# mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% +# rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% +# write.csv(here("Input_data", "pxl_rec_tec_gadm_spam_tnuts")) + +# # SUBSETTING FOR EU-27 GADM ---------------------------------------------------------------------- +# EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", +# "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR") +# write.csv(EU_iso3, here("Input_data", "EU27_iso3_lst.csv")) +# length(EU_iso3) #27 member states > the list comes from gams +# +# # Check if the EU countries iso3 are all present in dat.phys etc. +# EU_iso3 %in% dat.phys$GID_0.x #Validation +# dat.phys.EU = read_csv(here("Input_data", "phys_tnuts_spam_gadm")) +# filter(dat.phys, GID_0.x %in% EU_iso3) +# +# dat.prod.EU = read_csv(here("Input_data", "prod_tnuts_spam_gadm")) %>% +# filter(dat.prod, GID_0.x %in% EU_iso3) +# +# dat.harv.EU = read_csv(here("Input_data", "harv_tnuts_spam_gadm")) %>% +# dat.harv.EU = filter(dat.harv, GID_0.x %in% EU_iso3) +# +# +# do.call("rbind", list(read_csv(here("Input_data", "phys_tnuts_spam_gadm")), +# read_csv(here("Input_data", "prod_tnuts_spam_gadm")), +# read_csv(here("Input_data", "harv_tnuts_spam_gadm")))) %>% +# filter(GID_0.x %in% EU_iso3) %>% +# dplyr::select(-c(rest, X1)) %>% +# pivot_longer(names_to = "crops", values_to = "value", cols = whea:rest.1) %>% +# pivot_wider(-unit, names_from = "rec_type", +# values_from = "value" ) %>% +# mutate(Y = (P*1000/H))%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. +# mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% +# rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% +# write.csv(here("Input_data", "pxl_rec_tec_gadm_spam_tnuts_EU27")) +# +# # Testing the approach +# { +# # cbind dfs - testing subset +# fil.prod = dat.prod %>% top_n(n=20) +# fil.harv = dat.harv %>% top_n(n=20) +# fil.phys = dat.phys %>% top_n(n=20) +# +# lst.tnuts = list(fil.harv, fil.phys, fil.prod) +# do.call("rbind", list(fil.harv, fil.phys, fil.prod)) +# +# long.dat.nuts <- do.call("rbind", list(fil.harv, fil.phys, fil.prod)) %>% +# dplyr::select(-c(X1)) %>% +# pivot_longer(names_to = "crops", values_to = "value", cols = whea:tnuts) %>% +# pivot_wider(-unit, names_from = "rec_type", +# values_from = "value" ) %>% +# mutate(Y = (P*1000/H))%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. +# mutate_at(vars(Y), ~replace(., is.nan(.), 0)) #%>% +# # rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% +# write.csv(here("Input_data", "test_rbind_fil")) +# } + + + +# SUBSETTING FOR EU-27 GAUL ---------------------------------------------------------------------- +EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", + "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR") +# write.csv(EU_iso3, here("Input_data", "EU27_iso3_lst.csv")) +length(EU_iso3) #27 member states > the list comes from gams + +# Check if the EU countries iso3 are all present in dat.phys etc. +# EU_iso3 %in% dat.phys$iso3 #Validation if all EU27 countries are in the dfs below +dat.phys.EU = read_csv(here("Input_data", "phys_tnuts_rest_spam_gaul_FINAL.csv")) %>% #Pixel level data + filter(iso3 %in% EU_iso3) %>% + mutate(alloc_key = str_sub(alloc_key, start=2)) %>% + dplyr::select(-c(X1_1, X1)) + + +dat.prod.EU = read_csv(here("Input_data", "prod_tnuts_rest_spam_gaul_FINAL1.csv")) %>% + filter(iso3 %in% EU_iso3)%>% + mutate(alloc_key = str_sub(alloc_key, start=2)) %>% + dplyr::select(-c(X1)) + + +dat.harv.EU = read_csv(here("Input_data", "harv_tnuts_rest_spam_gaul_FINAL.csv")) %>% + filter(iso3 %in% EU_iso3) %>% + mutate(alloc_key = str_sub(alloc_key, start=2)) %>% + dplyr::select(-c(X1_1, X1)) + + +dat.all.EU= do.call("rbind", list(dat.phys.EU, dat.prod.EU, dat.harv.EU)) %>% + # filter(iso3 %in% EU_iso3) %>% + # dplyr::select(-c(X1, X1_1)) %>% + pivot_longer(names_to = "crops", values_to = "value", cols = whea:tnuts) %>% + pivot_wider(-unit, names_from = "rec_type", + values_from = "value" ) %>% + mutate(Y = (P*1000/H)) #%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. +mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% + # rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% + write_csv(here("Input_data", "pxl_rec_tec_gaul_spam_tnuts_EU_27_corr_prod.csv")) #IMPORTANT dataset. Pixel level. Tnuts. +dat.all.EU=read_csv(here("Input_data","pxl_rec_tec_gaul_spam_tnuts_EU_27_corr_prod.csv")) + +# Testing the approach +# { +# # cbind dfs - testing subset +# fil.prod = dat.prod %>% top_n(n=20) +# fil.harv = dat.harv %>% top_n(n=20) +# fil.phys = dat.phys %>% top_n(n=20) +# +# lst.tnuts = list(fil.harv, fil.phys, fil.prod) +# x= do.call("rbind", list(fil.harv, fil.phys, fil.prod)) +# +# long.dat.nuts <- do.call("rbind", list(fil.harv, fil.phys, fil.prod)) %>% +# dplyr::select(-c(X1)) %>% +# pivot_longer(names_to = "crops", values_to = "value", cols = whea:rest) %>% +# pivot_wider(-unit, names_from = "rec_type", +# values_from = "value" ) #%>% +# mutate(Y = (P*1000/H))%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. +# mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% +# rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% +# write.csv(here("Input_data", "test_rbind_fil")) +# } + +# Crop name mapping ------------------------------------------------------- +# SPAM mapping +nam_cat <- read_csv(here("Input_data", 'crop_names.csv')) # Loading crop name sheets - mapping SPAM -- FAO (FROM SPAM) +nam_cat <- nam_cat %>% mutate(FAOCODE = as.double(FAOCODE)) %>% + rename(No_spam = names(nam_cat)[1]) + +GAEZ_SPAM_crp = read_csv(here("Input_data", "6-GAEZ-and-SPAM-crops-correspondence-2015-02-26.csv")) #Mapping GAEZ and SPAM (from SPAM) +GAEZ_SPAM_crp = GAEZ_SPAM_crp %>% + rename(No_spam = names(.)[4]) %>% + dplyr::select('SPAM crop long name', 'GAEZ crop', No_spam) + +# ORiginal Croplist Modeldat CIFOS +FAO_CIFOS_crp = read_csv(here("Input_data", "FAO_modeldat_2020.csv")) + +# FAO spreaddsheets +FAO_stat_control = read_csv(here("Input_data", "FAOSTAT_data_12-26-2020-1.csv")) +food_cat <- read_csv(here("Input_data",'foodnonfood.csv'))# laoding food usage sheet +food_cat = food_cat %>% + rename(`SPAM short name`=Crop_short) +FAOcntr <- read_csv(here("Input_data", 'FAO_countries.csv')) # loading FAO country codes +FAOcntr <- dplyr::select(FAOcntr, 'ISO3', 'FAOSTAT_CODE', "GAUL_CODE", 'Continent', 'Subcontinent') + + +# The incomplete mapping of SPAM, GAEZ, FAOstat, FAO_Cifos are made +# Not all the crops are matched perfectly. This is made by hand in excel (export-import a chunk below) +model_dat = left_join(FAO_CIFOS_crp, FAO_stat_control, by="crop") %>% + left_join(nam_cat, ., by="FAOCODE") %>% + left_join(.,GAEZ_SPAM_crp, by= "No_spam") %>% + write.csv(here("Input_data", "model_crops_incomplete.csv")) + +# This data frame is a complete mapping of GAEZ, SPAM and FAO (FAO original and CIFOS model dat FAO crop names) +# The FAO crop names from the model data differ slightly between Model dat and the fAO crops that I exctracted from FAOSTAT. +# Crop others were adjusted according to SPAM indication and the GAEZ mapped crops (cereal others = oats, etc.) + +dat_crp_map <- read_csv("model_crops_mapping_GAEZ_SPAM_FAO_complete.csv") +# dat_crp_map2 <- read_csv(here("Input_data", "model_crops_incomplete.csv")) + +#Validation > Result = Complete match! +# dat_crp_ma p2$ crop_cifos %in% FAO_CIFOS_crp$crop + +dat_crp_map1 = dat_crp_map %>% + rename(crop_cifos = crop) %>% + left_join(.,food_cat) #adding food categories (food/non-food) + +dat_crp_map1$Usage[42]="food" #adding the food category to treenuts +write.csv(dat_crp_map1, here("Input_data", "crop_name_mapping_complete.csv")) # this file can be used for further mapping of the crops +dat_crp_map=read_csv(here("Input_data", "crop_name_mapping_complete.csv"))# Aggregating to ADM1 level EU ----------------------------------------------- + + +# Pxl EU27 data ---------------------------------------------------------- + + +dat.pxl.EU = read_csv(here("Input_data", "pxl_rec_tec_gaul_spam_tnuts_EU_27_corr_prod.csv")) +dat_TA_yld = read_csv(here("Input_data", "spam2010V2r0_global_Y_TA.csv")) %>% + dplyr::select(alloc_key,name_cntr,name_adm1, iso3, x, y) %>% + mutate(alloc_key = str_sub(alloc_key, 2)) + + +# Mapping countries +FAOcntr <- read_csv(here("Input_data", 'FAO_countries.csv')) +FAOcntr <- dplyr::select(FAOcntr, 'ISO3', 'FAOSTAT_CODE', "GAUL_CODE", 'Continent', 'Subcontinent') %>% + rename(iso3=ISO3) +dat_EU27 = read_csv(here("Input_data", "Country_name_mapping_incomplete.csv"))%>% + left_join(.,FAOcntr, by="iso3") + +dat_crop_nam = read_csv(here("Input_data", "crop_name_mapping_complete.csv")) %>% + dplyr::select(-c(X1,X1_1)) %>% + rename(crops = 'SPAM short name') + +# Transform pixel data so that it has SPAM variables (adm1 level) + +dat.pixel.EU = dat.pxl.EU %>% + mutate_at(vars(alloc_key), funs(as.character)) %>% + left_join(.,dat_TA_yld, by="alloc_key") %>% + dplyr::select(alloc_key, iso3.x, name_cntr.x,name_adm1.x, iso3.x, G2008_1_ID, + ADM0_CODE, ADM0_NAME, ADM1_CODE, ADM1_NAME, x, y, crops, tech_type) %>% + rename(iso3=iso3.x, + name_cntr=name_cntr.x, + name_adm1=name_adm1.x) %>% + left_join(.,dat_crop_nam, by="crops") %>% + left_join(.,dat_EU27, by="iso3") %>% + dplyr::select(G2008_1_ID, tech_type, CIFOS_cntr_nam, crop_cifos, crops) + +# df with prod_level and Cifos country EU +EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", + "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR") + +dat_cntr_EU = dat.pxl.EU %>% + mutate_at(vars(alloc_key), funs(as.character)) %>% + left_join(.,dat_TA_yld, by="alloc_key") %>% + dplyr::select(iso3.x, name_adm1.x) %>% + dplyr::distinct_at(vars('iso3.x', "name_adm1.x"), .keep_all = F) %>% + rename(iso3=iso3.x, + name_adm1=name_adm1.x) %>% + filter(iso3 %in% EU_iso3) %>% + left_join(.,dat_EU27, by="iso3") %>% + dplyr::select(iso3, name_adm1, CIFOS_cntr_nam, Continent, Subcontinent)# %>% +dat_cntr_EU %>% write_csv(here("Input_data", "dat_cntr_EU.csv")) + +# rename( +# iso3=iso3.x, +# name_adm1=name_adm1.x) + +# df with crops and cifos crops +dat_crop_nam1 = read_csv(here("Input_data", "crop_name_mapping_complete.csv")) %>% + dplyr::select('SPAM short name', FAONAMES, crop_cifos) %>% + rename(crops = 'SPAM short name') + +# Aggregating H,A,P and calculating Yields per Adm1 level +dat.adm1.EU = dat.pxl.EU %>% + mutate_at(vars(alloc_key), funs(as.character)) %>% + # left_join(., dat.pxl.EU, by="ADM1_NAME") %>% + # mutate_at(vars(alloc_key), funs(as.character)) %>% + left_join(.,dat_TA_yld, by="alloc_key") %>% + dplyr::select(iso3.x, name_cntr.x, name_adm1.x, ADM0_NAME, ADM1_NAME, tech_type, crops, A, H, P) %>% + group_by(iso3.x, name_adm1.x, tech_type, crops) %>% + summarize(across(.cols = A:P, .fns = sum))%>% #all three rec types are summed per adm1(prod_level) and crop + mutate(Y = (P*1000/H))%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. + mutate_at(vars(Y), ~replace(., is.nan(.), 0)) #%>% + +# This is the sheet that contains the parameters that are important for the GAMS input file +# It contains yield, harvested and physical area per country, adm1 unit and crop. +EU27_crp_adm1 = dat.adm1.EU %>% + left_join(.,dat_cntr_EU, by=c("name_adm1.x"="name_adm1")) %>% + filter(iso3 %in% EU_iso3) %>% + left_join(.,dat_crop_nam1, by="crops") %>% + dplyr::select(iso3, FAONAMES, Subcontinent, Continent, CIFOS_cntr_nam, name_adm1, crop_cifos, A, H, P, Y) %>% + write.csv(here("Input_data", "EU27_crp_adm1.csv")) # Final crop yield,area,etc. file. + +# Validate +EU27_crp_adm1 <- read_csv("C:/Users/simon083/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Modified_scripts/R/CifoS_Crop/Input_data/EU27_crp_adm1.csv") +View(EU27_crp_adm1) + +length(unique(EU27_crp_adm1$iso3)) +is.na(EU27_crp_adm1)#Correct! 27 countries present. +# Continue: Why is ALB in the final dataset. Check where this went wrong +# Validate Yields. Sum the ADM1 by hand. Check if matches +# Berechne ob das sinn macht > 319652 rows ?! + + +# Then add again the GADM and get values for the SOC% >> ADd the peat non peat to it. +# ADd usage to the sheet (or single sheet) +# + +# Bringing back some SPAM geo-variables to compare them with GADM +dat.agg.EU = dat_TA_yld %>% + dplyr::select(iso3, alloc_key, x, y,name_cntr, name_adm1) %>% + right_join(.,dat.pxl.EU, by="alloc_key") + + + + +dat.agg.EU %>% dplyr::select(iso3) dat.pxl.EU %>% + # dplyr::select(-Y) #%>% + group_by(GID_1) %>% + summarize + +max(dat.pxl.EU$Y) +summarise() + +v <- validator() +summary(v) +cf <- confront(dat.pxl.EU, v) +summary(cf) +?confront + + + +# creating a tidy table with all tech variables in one df +list.comb <- list(yld_dat, prod_dat, har_dat, phy_dat) +list.comb2=list(dat_har, dat_phy, dat_yld, da) +dat.comb <- do.call("rbind", list.comb) +# write.csv(dat.comb, 'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/dat_comb_tidy.csv', row.names=T) + + + + + +# For ArcGIS - area physical per pixel -------------------------- + +setwd(here("Input_data")) +# Physical area-------------------------------- +dat_TA_pa <- read_csv("spam2010V2r0_global_A_TA.csv") +dat_TA_pa %>% dplyr::select(alloc_key, x, y) %>%write_csv(here("Input_data", "coordinates_SPAM.csv")) + + +Phys_area_full_cropspread <- read_csv("Rectypes_full_cropspread/Phys_area_full_cropspread.csv") +x=Phys_area_full_cropspread %>% + dplyr::rowwise() %>% + dplyr::mutate(phys_area_pixl = sum(dplyr::c_across(whea:rest), na.rm = T)) + +x2=x %>% + dplyr::select(alloc_key, x, y, rec_type, tech_type, phys_area_pixl) %>% + group_by(alloc_key) %>% + mutate(sum_phys_area_pixel = sum(phys_area_pixl)) + +unique_coord = x2 %>% + distinct_at(vars(alloc_key, sum_phys_area_pixel)) %>% + left_join(., Phys_area_full_cropspread, by="alloc_key") %>% + dplyr::select(alloc_key, x, y, sum_phys_area_pixel) %>% + distinct_at(vars(alloc_key, x, y, sum_phys_area_pixel)) + +write.csv(unique_coord, here("Input_data", "physical_area_per_pixel2.csv")) + + + + + + + + + + + + + + + + + +# loading crop usage sheet -------------------------------------------------------------- +setwd(here("Input_dat")) +food_cat <- read_csv('foodnonfood.csv') +# Loading crop name sheets +nam_cat <- read_csv('crop_names.csv') +# loading FAO country codes +FAOcntr <- read_csv(here("Input_data", 'FAO_countries.csv')) +FAOcntr <- dplyr::select(FAOcntr, 'ISO3', 'FAOSTAT_CODE', "GAUL_CODE", 'Continent', 'Subcontinent') + +# joining all together +names(dat.comb)[8] <- "Crop_short" +names(nam_cat)[2] <- "Crop_short" +names(dat.comb)[2] <- "ISO3" + +join.use <- left_join(dat.comb, food_cat, by="Crop_short") +join_FAOcntr <- left_join(join.use, FAOcntr, by="ISO3") +SPAM_dat_tidy <- left_join(join_FAOcntr, nam_cat, by='Crop_short') + + + + + +AggregateFun_mean <- function(z){ #for yield + ddply(z, .(name_adm1, crop), + function(z) mean(z$valu[z$valu!=0])) +} + +AggregateFun_sum <- function(g){ #for area + ddply(g, .(name_adm1, crop), + function(g) sum(g$valu)) +} + +CollapseFun <- function(h){ + h %>% dplyr::distinct_at(vars('name_adm1', 'crop'), .keep_all = TRUE) +} + +CombineFun <- function(d,s){ + full_join(d,s, by=c("name_adm1", 'crop')) +} + + + +# rename and reoder dataset columns +Clean2Fun <- function(x){ + i <- x %>% dplyr::rename(SPAMcrop_short=Crop_short, + SPAMcrop_long=`SPAM long name`, + use=Usage) %>% + dplyr::select(-c(`No. crt.`, ID))%>% + arrange(name_adm1, SPAMcrop_short, rec_type, tech_type) + c <- i[c('ISO3', 'name_cntr', 'name_adm1', 'alloc_key', 'FAOSTAT_CODE', 'GAUL_CODE', 'SPAMcrop_short', 'Continent', 'Subcontinent', 'SPAMcrop_long', 'FAONAMES', 'FAOCODE', 'GROUP', 'use', 'rec_type', 'tech_type', 'unit', 'valu')] +} +SPAM_dat_tidy <- Clean2Fun(SPAM_dat_tidy) + +# Save file - Option long dataset +write.csv(SPAM_dat_tidy, here("Input_dat", "Yld_tidy_all.csv"), row.names=T) + +# Option2 - Harvested and Physical area horizontal +SprdFun <- function(x){ + t <- x %>% dplyr::select(-c(unit)) %>% + spread(rec_type, valu)%>% + dplyr::rename(yield_kgha=Y, + phys_area_ha=A, + harv_area_ha=H) + c <- t[c('Continent', 'Subcontinent', 'ISO3', 'name_cntr', 'name_adm1', 'alloc_key', 'FAOSTAT_CODE', 'GAUL_CODE', 'SPAMcrop_short', 'SPAMcrop_long', 'FAONAMES', 'FAOCODE', 'GROUP', 'use', 'tech_type', 'phys_area_ha', 'harv_area_ha', 'yield_kgha')] +} + +sprd_dat <- SprdFun(SPAM_dat_tidy) +write.csv(sprd_dat_fixd, here("Input_dat","SPAMspread_all_fixd.csv"), row.names=T) + + diff --git a/SPAM_Data_Transformation/SPAM_no_agg_spatial-L0157316.R b/SPAM_Data_Transformation/SPAM_no_agg_spatial-L0157316.R new file mode 100644 index 0000000000000000000000000000000000000000..286a1fd1974b2d827ab02b5ff19c0efbe2b4455c --- /dev/null +++ b/SPAM_Data_Transformation/SPAM_no_agg_spatial-L0157316.R @@ -0,0 +1,751 @@ +easypackages::packages(c("tidyverse", 'sf', 'raster','sp','dplyr','spatialEco', + 'exactextractr','tibble', 'here', "readr", + "readxl", "foreign", "data.table", "rbenchmark", "benchmarkme", + "validate")) +setwd(here("Input_data")) +# here::here() +##To know the current storage capacity +memory.limit() + +## To increase the storage capacity +# memory.limit(size=20000) +gc() +# Load data ---------------------------------------------------------------- +# Yields ------------------------------------------------------------------ +dat_TA_yld <- read_csv("spam2010V2r0_global_Y_TA.csv")# Yields - all technologies together, ie complete crop - Data downloaded on 1. of September 2020 +dat_TI_yld <- read_csv("spam2010V2r0_global_Y_TI.csv")# Yields - Iirrigated portion of crop +dat_TH_yld <- read_csv("spam2010V2r0_global_Y_TH.csv")# Yields - rainfed high inputs portion of crop +dat_TL_yld <- read_csv("spam2010V2r0_global_Y_TL.csv")# Yields - rainfed low inputs portion of crop +dat_TS_yld <- read_csv("spam2010V2r0_global_Y_TS.csv")# Yields - rainfed subsistence portion of crop +dat_TR_yld <- read_csv("spam2010V2r0_global_Y_TR.csv")# Yields - rainfed portion of crop (= TA - TI, or TH + TL + TS) + + +# Production +dat_TA_pr <- read_csv("spam2010V2r0_global_P_TA.csv") +dat_TI_pr <- read_csv("spam2010V2r0_global_P_TI.csv") +dat_TH_pr <- read_csv("spam2010V2r0_global_P_TH.csv") +dat_TL_pr <- read_csv("spam2010V2r0_global_P_TL.csv") +dat_TS_pr <- read_csv("spam2010V2r0_global_P_TS.csv") +dat_TR_pr <- read_csv("spam2010V2r0_global_P_TR.csv") + +# Physical area-------------------------------- +dat_TA_pa <- read_csv("spam2010V2r0_global_A_TA.csv") +dat_TI_pa <- read_csv("spam2010V2r0_global_A_TI.csv") +dat_TH_pa <- read_csv("spam2010V2r0_global_A_TH.csv") +dat_TL_pa <- read_csv("spam2010V2r0_global_A_TL.csv") +dat_TS_pa <- read_csv("spam2010V2r0_global_A_TS.csv") +dat_TR_pa <- read_csv("spam2010V2r0_global_A_TR.csv") + +# Harvesting area---------------------------------- +dat_TA_ha <- read_csv("spam2010V2r0_global_H_TA.csv") +dat_TI_ha <- read_csv("spam2010V2r0_global_H_TI.csv") +dat_TH_ha <- read_csv("spam2010V2r0_global_H_TH.csv") +dat_TL_ha <- read_csv("spam2010V2r0_global_H_TL.csv") +dat_TS_ha <- read_csv("spam2010V2r0_global_H_TS.csv") +dat_TR_ha <- read_csv("spam2010V2r0_global_H_TR.csv") + +# Functions - Yld --------------------------------------------------------------- +ChangeNames <- function(x) { ## Renaming function + crp <- c('whea', 'rice', 'maiz', 'barl', 'pmil', 'smil', 'sorg', 'ocer', 'pota', 'swpo', 'yams', 'cass', 'orts', 'bean', 'chic', 'cowp', 'pige', 'lent', 'opul', 'soyb', + 'grou', 'cnut', 'oilp', 'sunf', 'rape', 'sesa', 'ooil', 'sugc', 'sugb', 'cott', 'ofib', 'acof', 'rcof', 'coco', 'teas', 'toba', 'bana', 'plnt', 'trof', 'temf', 'vege', 'rest') + names(x)[10:51] <- crp + return(x) +} +GatherFun <- function(k){ + k %>% gather('crop', 'valu', whea:rest) +} +# ------------------------------------------- +# If no aggregartion +AggregateFun_mean <- function(z){ #for yield + ddply(z, .(name_adm1, crop), + function(z) mean(z$valu[z$valu!=0])) +} + +AggregateFun_sum <- function(g){ #for area + ddply(g, .(name_adm1, crop), + function(g) sum(g$valu)) +} + +CollapseFun <- function(h){ + h %>% dplyr::distinct_at(vars('name_adm1', 'crop'), .keep_all = TRUE) +} + +CombineFun <- function(d,s){ + full_join(d,s, by=c("name_adm1", 'crop')) +} +# ------------------------- +CleanFun <- function(l){ + l %>% + # dplyr::rename( + # # value = V1, + # cntr = iso3) %>% + dplyr::select(iso3, name_cntr, name_adm1, name_adm2, alloc_key, x, y, rec_type, tech_type, unit, whea:rest) + } + +TransformFun_yld <- function(d){ + p <- ChangeNames(d) + # l <- GatherFun(p) + # r <- AggregateFun_mean(l) + # y <- CollapseFun(l) + # u <- CombineFun(r,y) + j <- CleanFun(p) +} + +TransformFun_area <- function(d){ + p <- ChangeNames(d) + # l <- GatherFun(p) + # r <- AggregateFun_sum(l) + # y <- CollapseFun(l) + # u <- CombineFun(r,y) + j <- CleanFun(p) +} + + +# With aggregation long ----------------------------------------------------- +ChangeNames <- function(x) { ## Renaming function + crp <- c('whea', 'rice', 'maiz', 'barl', 'pmil', 'smil', 'sorg', 'ocer', 'pota', 'swpo', 'yams', 'cass', 'orts', 'bean', 'chic', 'cowp', 'pige', 'lent', 'opul', 'soyb', + 'grou', 'cnut', 'oilp', 'sunf', 'rape', 'sesa', 'ooil', 'sugc', 'sugb', 'cott', 'ofib', 'acof', 'rcof', 'coco', 'teas', 'toba', 'bana', 'plnt', 'trof', 'temf', 'vege', 'rest') + names(x)[10:51] <- crp + return(x) +} +GatherFun <- function(k){ + k %>% gather('crop', 'valu', whea:rest) +} + +AggregateFun_mean <- function(z){ #for yield + ddply(z, .(name_adm1, crop), + function(z) mean(z$valu[z$valu!=0])) +} + +AggregateFun_sum <- function(g){ #for area + ddply(g, .(name_adm1, crop), + function(g) sum(g$valu)) +} + +CollapseFun <- function(h){ + h %>% dplyr::distinct_at(vars('name_adm1', 'crop'), .keep_all = TRUE) +} + +CombineFun <- function(d,s){ + full_join(d,s, by=c("name_adm1", 'crop')) +} + +# CleanFun <- function(l){ + u %>% + dplyr::rename(value = V1) #%>% + dplyr::select(iso3, name_cntr, name_adm1, name_adm2, + alloc_key, x, y, rec_type, tech_type, + unit, whea:rest) +} + +TransformFun_yld <- function(d){ + p <- ChangeNames(d) + l <- GatherFun(p) + r <- AggregateFun_mean(l) + y <- CollapseFun(l) + u <- CombineFun(r,y) + j <- CleanFun(p) +} + +# TransformFun_area <- function(d){ + p <- ChangeNames(dat_TA_pd) + l <- GatherFun(p) + r <- AggregateFun_sum(p) + y <- CollapseFun(l) + u <- CombineFun(r,y) + j <- CleanFun(p) +} + + +# With aggregation l spread crops ---------------------------------------- + + + +ChangeNames <- function(x) { ## Renaming function + crp <- c('whea', 'rice', 'maiz', 'barl', 'pmil', 'smil', 'sorg', 'ocer', 'pota', 'swpo', 'yams', 'cass', 'orts', 'bean', 'chic', 'cowp', 'pige', 'lent', 'opul', 'soyb', + 'grou', 'cnut', 'oilp', 'sunf', 'rape', 'sesa', 'ooil', 'sugc', 'sugb', 'cott', 'ofib', 'acof', 'rcof', 'coco', 'teas', 'toba', 'bana', 'plnt', 'trof', 'temf', 'vege', 'rest') + names(x)[10:51] <- crp + return(x) +} + +GatherFun <- function(k){ + k %>% gather('crop', 'valu', whea:rest) +} + +# AggregateFun_mean <- function(z){ #for yield +# ddply(z, .(name_adm1, crop), +# function(z) mean(z$valu[z$valu!=0])) +# } + + +# } + +AggregateFun_sum <- function(g){ #for area + ddply(g, .(name_adm1, crop), + function(g) sum(g$valu)) +} + +CollapseFun <- function(h){ + h %>% dplyr::distinct_at(vars('name_adm1', 'crop'), .keep_all = TRUE) +} + +CombineFun <- function(d,s){ + full_join(d,s, by=c("name_adm1", 'crop')) +} + +CleanFun <- function(l){ +l %>% + # dplyr::rename(value = V1) #%>% +dplyr::select(iso3, name_adm1, + alloc_key, x, y, rec_type, tech_type, + unit, whea:rest) +} + +# TransformFun_yld <- function(d){ + p <- ChangeNames(d) + # l <- GatherFun(p) + # r <- AggregateFun_mean(l) + # y <- CollapseFun(l) + # u <- CombineFun(r,y) + j <- CleanFun(u) +# } + +TransformFun_area <- function(d){ +p <- ChangeNames(d) +# l <- GatherFun(p) +# r <- AggregateFun_sum(p) +# y <- CollapseFun(l) +# u <- CombineFun(r,y) +j <- CleanFun(p) +} + + + +# rm(dat_TS_yld) + +# Applying transformation to all yld_tech_types +TA_yld <- TransformFun_area(dat_TA_yld) +TH_yld <- TransformFun_area(dat_TH_yld) +TI_yld <- TransformFun_area(dat_TI_yld) +TL_yld <- TransformFun_area(dat_TL_yld) +TR_yld <- TransformFun_area(dat_TR_yld) +TS_yld <- TransformFun_area(dat_TS_yld) +lst.yld <- list(TA_yld, TH_yld, TI_yld, TL_yld, TR_yld, TS_yld) +dat_yld <- do.call("rbind", lst.yld) # merging all tech types +write.csv(dat_yld, 'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/Yield_full_cropspread.csv', row.names=T) + +# Applying transformation to production +TA_prd <- TransformFun_area(dat_TA_pd) +TH_prd <- TransformFun_area(dat_TH_pd) +TI_prd <- TransformFun_area(dat_TI_pd) +TL_prd <- TransformFun_area(dat_TL_pd) +TR_prd <- TransformFun_area(dat_TR_pd) +TS_prd <- TransformFun_area(dat_TS_pd) +lst.prd <- list(TA_prd, TH_prd, TI_prd, TL_prd, TR_prd, TS_prd) +dat_prd <- do.call("rbind", lst.prd) # merging all tech types +write.csv(dat_prd, 'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/Production_full_cropspread.csv', row.names=T) + +#Applying transformation to physical_tech types +TA_phy <- TransformFun_area(dat_TA_pa) +TH_phy <- TransformFun_area(dat_TH_pa) +TI_phy <- TransformFun_area(dat_TI_pa) +TL_phy <- TransformFun_area(dat_TL_pa) +TR_phy <- TransformFun_area(dat_TR_pa) +TS_phy <- TransformFun_area(dat_TS_pa) +lst.phy <- list(TA_phy, TH_phy, TI_phy, TL_phy, TR_phy, TS_phy) +dat_phy <- do.call("rbind", lst.phy) # merging all tech types +write.csv(dat_phy, 'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/Phys_area_full_cropspread.csv', row.names=T) + +#Applying transformation to physical_tech types +TA_har <- TransformFun_area(dat_TA_ha) +TH_har <- TransformFun_area(dat_TH_ha) +TI_har <- TransformFun_area(dat_TI_ha) +TL_har <- TransformFun_area(dat_TL_ha) +TR_har <- TransformFun_area(dat_TR_ha) +TS_har <- TransformFun_area(dat_TS_ha) +lst.har <- list(TA_har, TH_har, TI_har, TL_har, TR_har, TS_har) +dat_har <- do.call("rbind", lst.har) # merging all tech types +write.csv(dat_har, 'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/Harv_area_spatial_spread_SPAM.csv', row.names=T) + +# Independent -- Merging treenuts with SPAM ------------------------------- +# LOAD data from previous step (due to memory overuse) +setwd(here('Input_data', "Rectypes_full_cropspread")) +yld_dat <- read_csv('Yield_full_cropspread.csv') +prd_dat <- read_csv('Production_full_cropspread.csv') +har_dat <- read_csv('Harv_area_spatial_spread_SPAM.csv') +phy_dat <- read_csv('Phys_area_full_cropspread.csv') + + +# TREENUTS WITH GADM ------------------------------------------------------ +# # Load and check shapefile +# wd=setwd(here("Input_data")) +# sph.prod = read_sf(dsn = wd, layer = "GADM_treenut_rest_pd") +# sph.harv = read_sf(dsn = wd, layer = "GADM_treenut_rest_ha") +# sph.all = st_join(sph.prod, sph.harv, join = st_within) +# # write_sf(sph.all, dsn = here("Input_data", driver = "ESRI shapefile")) +# sph.prod.tib = as_tibble(sph.prod) +# sph.prod.tib = dplyr::select(sph.prod.tib, UID, GID_0, GID_1, trnt_r_) +# +# sph.harv.tib = as_tibble(sph.harv) +# sph.harv.tib = dplyr::select(sph.harv.tib, UID, GID_0, GID_1, trnt_r_) +# +# # loading SPAM-GADM join +# spam.gadm.link = read_sf(dsn = here("Input_data", "GADMtoSPAM", "GADM_to_SPAM_within"), +# layer = "GADM_to_SPAM_within") +# # +# # Treenuts GADM pixel --------------------------------------------------------------- +# # CAUTION: THESE FILES ARE WRONG! THE RATIO IS WRONGLY CALULATED (not grouped by adm1 and adm0 level) +# # Treenuts are loaded from the treenuts script +# # Production +# # rbenchmark::benchmark( +# spam.gadm.link %>% +# as_tibble() %>% +# left_join(.,sph.prod.tib, by="UID") %>% +# left_join(.,prd_dat, by="alloc_key") %>% +# mutate(tnuts = trnt_r_*rest) %>% +# mutate(rest.1 = (1-trnt_r_)*rest) %>% +# dplyr::select(alloc_key, UID, GID_0.x, GID_1.x, rec_type, tech_type, unit, +# whea:rest, tnuts, rest.1) %>% +# write.csv(here("Input_data", "prod_tnuts_spam_gadm.csv")) +# +# # +# # c=read_csv(here("Input_data", "prod_tnuts_spam_gadm.csv")) +# # +# # Approach validation > do the numbers make sense? +# # xy=dat_join_prod %>% +# # group_by(tnuts) %>% +# # top_n(n=10) %>% +# # dplyr::select(rest, tnuts, rest.1) +# +# # write_csv(dat_join_prod, here("Input_data", "dat_treenut_prod.csv")) +# # st_as_sf() +# +# # Harvested area +# # rbenchmark::benchmark( +# spam.gadm.link %>% +# as_tibble() %>% +# left_join(., har_dat, by="alloc_key") %>% +# left_join(.,sph.harv.tib, by="UID") %>% +# mutate(tnuts = trnt_r_*rest) %>% +# mutate(rest.1 = (1-trnt_r_)*rest) %>% +# dplyr::select(alloc_key, UID, GID_0.x, GID_1.x, rec_type, tech_type, unit, +# whea:rest, tnuts, rest.1) %>% +# write.csv(here("Input_data", "harv_tnuts_spam_gadm")) +# # ) +# +# # Physical land +# # rbenchmark::benchmark( +# spam.gadm.link %>% +# as_tibble() %>% +# left_join(., phy_dat, by="alloc_key") %>% +# left_join(.,sph.harv.tib, by="UID") %>% +# mutate(tnuts = trnt_r_*rest) %>% +# mutate(rest.1 = (1-trnt_r_)*rest) %>% +# dplyr::select(alloc_key, UID, GID_0.x, GID_1.x, rec_type, tech_type, unit, +# whea:rest, tnuts, rest.1) %>% +# write.csv(here("Input_data", "phys_tnuts_spam_gadm")) +# +# # Approach: Aim is to get dinal datasets on pixel level > Adm1 level +# 1_Cbind the three datasets +# 2_Gather crops +# 3_Spread levels rec type +# 4_Calculate yields after unit transformation + +# # 1. Load data +# # Global +# dat.phys = read_csv(here("Input_data", "phys_tnuts_spam_gadm.csv")) +# dat.prod = read_csv(here("Input_data", "prod_tnuts_spam_gadm,csv")) +# dat.harv = read_csv(here("Input_data", "harv_tnuts_spam_gadm.csv")) +# +# +# # lst.tnuts = list(dat.harv, dat.phys, dat.prod) +# dat_nutsall <- do.call("rbind", list(dat.harv, dat.phys, dat.prod)) +# +# # GLobal - Issue: Do this with the new laptop! +# # Principle of this function is to not generate any variable as this can lead to overuse of RAM. +# do.call("rbind", list(read_csv(here("Input_data", "phys_tnuts_spam_gadm")), +# read_csv(here("Input_data", "prod_tnuts_spam_gadm")), +# read_csv(here("Input_data", "harv_tnuts_spam_gadm")))) %>% +# dplyr::select(-c(rest, X1)) %>% +# pivot_longer(names_to = "crops", values_to = "value", cols = whea:rest.1) %>% +# pivot_wider(-unit, names_from = "rec_type", +# values_from = "value" ) %>% +# mutate(Y = (P*1000/H))%>% #checked for result. We are transforming mt to tons when multiplying Producction by 1000 +# mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% +# rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% +# write.csv(here("Input_data", "pxl_rec_tec_gadm_spam_tnuts")) + +# # SUBSETTING FOR EU-27 GADM ---------------------------------------------------------------------- +# EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", +# "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR") +# write.csv(EU_iso3, here("Input_data", "EU27_iso3_lst.csv")) +# length(EU_iso3) #27 member states > the list comes from gams +# +# # Check if the EU countries iso3 are all present in dat.phys etc. +# EU_iso3 %in% dat.phys$GID_0.x #Validation +# dat.phys.EU = read_csv(here("Input_data", "phys_tnuts_spam_gadm")) +# filter(dat.phys, GID_0.x %in% EU_iso3) +# +# dat.prod.EU = read_csv(here("Input_data", "prod_tnuts_spam_gadm")) %>% +# filter(dat.prod, GID_0.x %in% EU_iso3) +# +# dat.harv.EU = read_csv(here("Input_data", "harv_tnuts_spam_gadm")) %>% +# dat.harv.EU = filter(dat.harv, GID_0.x %in% EU_iso3) +# +# +# do.call("rbind", list(read_csv(here("Input_data", "phys_tnuts_spam_gadm")), +# read_csv(here("Input_data", "prod_tnuts_spam_gadm")), +# read_csv(here("Input_data", "harv_tnuts_spam_gadm")))) %>% +# filter(GID_0.x %in% EU_iso3) %>% +# dplyr::select(-c(rest, X1)) %>% +# pivot_longer(names_to = "crops", values_to = "value", cols = whea:rest.1) %>% +# pivot_wider(-unit, names_from = "rec_type", +# values_from = "value" ) %>% +# mutate(Y = (P*1000/H))%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. +# mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% +# rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% +# write.csv(here("Input_data", "pxl_rec_tec_gadm_spam_tnuts_EU27")) +# +# # Testing the approach +# { +# # cbind dfs - testing subset +# fil.prod = dat.prod %>% top_n(n=20) +# fil.harv = dat.harv %>% top_n(n=20) +# fil.phys = dat.phys %>% top_n(n=20) +# +# lst.tnuts = list(fil.harv, fil.phys, fil.prod) +# do.call("rbind", list(fil.harv, fil.phys, fil.prod)) +# +# long.dat.nuts <- do.call("rbind", list(fil.harv, fil.phys, fil.prod)) %>% +# dplyr::select(-c(X1)) %>% +# pivot_longer(names_to = "crops", values_to = "value", cols = whea:tnuts) %>% +# pivot_wider(-unit, names_from = "rec_type", +# values_from = "value" ) %>% +# mutate(Y = (P*1000/H))%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. +# mutate_at(vars(Y), ~replace(., is.nan(.), 0)) #%>% +# # rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% +# write.csv(here("Input_data", "test_rbind_fil")) +# } + + + + + +# SUBSETTING FOR EU-27 GAUL ---------------------------------------------------------------------- +EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", + "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR") +# write.csv(EU_iso3, here("Input_data", "EU27_iso3_lst.csv")) +length(EU_iso3) #27 member states > the list comes from gams + +# Check if the EU countries iso3 are all present in dat.phys etc. +# EU_iso3 %in% dat.phys$iso3 #Validation if all EU27 countries are in the dfs below +dat.phys.EU = read_csv(here("Input_data", "phys_tnuts_rest_spam_gaul_FINAL.csv")) %>% #Pixel level data + filter(iso3 %in% EU_iso3) %>% + mutate(alloc_key = str_sub(alloc_key, start=2)) %>% + dplyr::select(-c(X1_1, X1)) + + +dat.prod.EU = read_csv(here("Input_data", "prod_tnuts_rest_spam_gaul_FINAL1.csv")) %>% +filter(iso3 %in% EU_iso3)%>% + mutate(alloc_key = str_sub(alloc_key, start=2)) %>% + dplyr::select(-c(X1)) + + +dat.harv.EU = read_csv(here("Input_data", "harv_tnuts_rest_spam_gaul_FINAL.csv")) %>% + filter(iso3 %in% EU_iso3) %>% + mutate(alloc_key = str_sub(alloc_key, start=2)) %>% + dplyr::select(-c(X1_1, X1)) + + +dat.all.EU= do.call("rbind", list(dat.phys.EU, dat.prod.EU, dat.harv.EU)) %>% + # filter(iso3 %in% EU_iso3) %>% + # dplyr::select(-c(X1, X1_1)) %>% + pivot_longer(names_to = "crops", values_to = "value", cols = whea:tnuts) %>% + pivot_wider(-unit, names_from = "rec_type", + values_from = "value" ) %>% + mutate(Y = (P*1000/H)) #%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. + mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% + # rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% + write_csv(here("Input_data", "pxl_rec_tec_gaul_spam_tnuts_EU_27_corr_prod.csv")) #IMPORTANT dataset. Pixel level. Tnuts. +dat.all.EU=read_csv(here("Input_data","pxl_rec_tec_gaul_spam_tnuts_EU_27_corr_prod.csv")) + +# Testing the approach +# { + # # cbind dfs - testing subset + # fil.prod = dat.prod %>% top_n(n=20) + # fil.harv = dat.harv %>% top_n(n=20) + # fil.phys = dat.phys %>% top_n(n=20) + # + # lst.tnuts = list(fil.harv, fil.phys, fil.prod) + # x= do.call("rbind", list(fil.harv, fil.phys, fil.prod)) + # + # long.dat.nuts <- do.call("rbind", list(fil.harv, fil.phys, fil.prod)) %>% + # dplyr::select(-c(X1)) %>% + # pivot_longer(names_to = "crops", values_to = "value", cols = whea:rest) %>% + # pivot_wider(-unit, names_from = "rec_type", + # values_from = "value" ) #%>% + # mutate(Y = (P*1000/H))%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. + # mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% + # rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% + # write.csv(here("Input_data", "test_rbind_fil")) + # } + +# Crop name mapping ------------------------------------------------------- +# SPAM mapping +nam_cat <- read_csv(here("Input_data", 'crop_names.csv')) # Loading crop name sheets - mapping SPAM -- FAO (FROM SPAM) +nam_cat <- nam_cat %>% mutate(FAOCODE = as.double(FAOCODE)) %>% + rename(No_spam = names(nam_cat)[1]) + +GAEZ_SPAM_crp = read_csv(here("Input_data", "6-GAEZ-and-SPAM-crops-correspondence-2015-02-26.csv")) #Mapping GAEZ and SPAM (from SPAM) +GAEZ_SPAM_crp = GAEZ_SPAM_crp %>% + rename(No_spam = names(.)[4]) %>% + dplyr::select('SPAM crop long name', 'GAEZ crop', No_spam) + +# ORiginal Croplist Modeldat CIFOS +FAO_CIFOS_crp = read_csv(here("Input_data", "FAO_modeldat_2020.csv")) + +# FAO spreaddsheets +FAO_stat_control = read_csv(here("Input_data","FAOSTAT_data_12-26-2020-1.csv")) +food_cat <- read_csv(here("Input_data",'foodnonfood.csv'))# laoding food usage sheet +food_cat = food_cat %>% + rename(`SPAM short name`=Crop_short) +FAOcntr <- read_csv(here("Input_data",'FAO_countries.csv')) # loading FAO country codes +FAOcntr <- dplyr::select(FAOcntr, 'ISO3', 'FAOSTAT_CODE', "GAUL_CODE", 'Continent', 'Subcontinent') + + +# The incomplete mapping of SPAM, GAEZ, FAOstat, FAO_Cifos are made +# Not all the crops are matched perfectly. This is made by hand in excel (export-import a chunk below) +model_dat = left_join(FAO_CIFOS_crp, FAO_stat_control, by="crop") %>% + left_join(nam_cat, ., by="FAOCODE") %>% + left_join(.,GAEZ_SPAM_crp, by= "No_spam") #%>% + write.csv(here("Input_data", "model_crops_incomplete.csv")) + +# This data frame is a complete mapping of GAEZ, SPAM and FAO (FAO original and CIFOS model dat FAO crop names) +# The FAO crop names from the model data differ slightly between Model dat and the fAO crops that I exctracted from FAOSTAT. +# Crop others were adjusted according to SPAM indication and the GAEZ mapped crops (cereal others = oats, etc.) + +dat_crp_map <- read_csv(here("Input_data","model_crops_mapping_GAEZ_SPAM_FAO_complete.csv")) +# dat_crp_map2 <- read_csv(here("Input_data", "model_crops_incomplete.csv")) + +#Validation > Result = Complete match! +# dat_crp_ma p2$ crop_cifos %in% FAO_CIFOS_crp$crop + +dat_crp_map1 = dat_crp_map %>% + rename(crop_cifos = crop) %>% + left_join(.,food_cat) #adding food categories (food/non-food) + +dat_crp_map1$Usage[42]="food" #adding the food category to treenuts +write.csv(dat_crp_map1, here("Input_data", "crop_name_mapping_complete.csv")) # this file can be used for further mapping of the crops + +# Aggregating to ADM1 level EU ----------------------------------------------- +# Load and transform data +dat.pxl.EU = read_csv(here("Input_data", "pxl_rec_tec_gaul_spam_tnuts_EU_27_corr_prod.csv")) +dat_TA_yld = read_csv(here("Input_data", "spam2010V2r0_global_Y_TA.csv")) %>% + dplyr::select(alloc_key,name_cntr,name_adm1, iso3, prod_level, x, y) %>% + mutate(alloc_key = str_sub(alloc_key, 2)) + + +# Mapping countries +FAOcntr <- read_csv(here("Input_data", 'FAO_countries.csv')) +FAOcntr <- dplyr::select(FAOcntr, 'ISO3', 'FAOSTAT_CODE', "GAUL_CODE", 'Continent', 'Subcontinent') %>% + rename(iso3=ISO3) +dat_EU27 = read_csv(here("Input_data", "Country_name_mapping_incomplete.csv"))%>% + left_join(.,FAOcntr, by="iso3") + +dat_crop_nam = read_csv(here("Input_data", "crop_name_mapping_complete.csv")) %>% + dplyr::select(-c(X1,X1_1)) %>% + rename(crops = 'SPAM short name') + +# Transform pixel data so that it has SPAM variables (adm1 level) + +dat.pixel.EU = dat.pxl.EU %>% + mutate_at(vars(alloc_key), funs(as.character)) %>% + left_join(.,dat_TA_yld, by="alloc_key") %>% + dplyr::select(alloc_key, iso3.x, name_cntr.x,name_adm1.x, prod_level, iso3.x, G2008_1_ID, + ADM0_CODE, ADM0_NAME, ADM1_CODE, ADM1_NAME, x, y, crops, tech_type) %>% + rename(iso3=iso3.x, + name_cntr=name_cntr.x, + name_adm1=name_adm1.x) %>% + left_join(.,dat_crop_nam, by="crops") %>% + left_join(.,dat_EU27, by="iso3") %>% + dplyr::select(prod_level, G2008_1_ID, tech_type, CIFOS_cntr_nam, crop_cifos, crops) + +# df with prod_level and Cifos country EU +EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", + "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR") + +dat_cntr_EU = dat.pxl.EU %>% + mutate_at(vars(alloc_key), funs(as.character)) %>% + left_join(.,dat_TA_yld, by="alloc_key") %>% + dplyr::select(prod_level, iso3.x, name_adm1.x) %>% + dplyr::distinct_at(vars('prod_level', 'iso3.x', "name_adm1.x"), .keep_all = F) %>% + rename(iso3=iso3.x, + name_adm1=name_adm1.x) %>% + filter(iso3 %in% EU_iso3) %>% + left_join(.,dat_EU27, by="iso3") %>% + dplyr::select(prod_level, iso3, name_adm1, CIFOS_cntr_nam, Continent, Subcontinent)# %>% + # rename( + # iso3=iso3.x, + # name_adm1=name_adm1.x) + +# df with crops and cifos crops +dat_crop_nam1 = read_csv(here("Input_data", "crop_name_mapping_complete.csv")) %>% + dplyr::select('SPAM short name', FAONAMES, crop_cifos) %>% + rename(crops = 'SPAM short name') + +# Aggregating H,A,P and calculating Yields per Adm1 level +dat.adm1.EU = dat.pxl.EU %>% + mutate_at(vars(alloc_key), funs(as.character)) %>% +# left_join(., dat.pxl.EU, by="ADM1_NAME") %>% +# mutate_at(vars(alloc_key), funs(as.character)) %>% + left_join(.,dat_TA_yld, by="alloc_key") %>% + dplyr::select(iso3.x, name_cntr.x, name_adm1.x, ADM0_NAME, ADM1_NAME, prod_level, tech_type, crops, A, H, P) %>% + group_by(prod_level, tech_type, crops) %>% + summarize(across(.cols = A:P, .fns = sum))%>% #all three rec types are summed per adm1(prod_level) and crop + mutate(Y = (P*1000/H))%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. + mutate_at(vars(Y), ~replace(., is.nan(.), 0)) #%>% + +# This is the sheet that contains the parameters that are important for the GAMS input file +# It contains yield, harvested and physical area per country, adm1 unit and crop. +EU27_crp_adm1 = dat.adm1.EU %>% + left_join(.,dat_cntr_EU, by="prod_level") %>% + filter(iso3 %in% EU_iso3) %>% + left_join(.,dat_crop_nam1, by="crops") %>% + dplyr::select(iso3, FAONAMES, Subcontinent, Continent, CIFOS_cntr_nam, name_adm1, crop_cifos, A, H, P, Y) %>% + write_csv(here("Input_data", "EU27_crp_adm1_gaul.csv")) # Final crop yield,area,etc. file. + +# Final dataset!! +EU27_crp_adm1 <- read_csv(here("Input_data", "EU27_crp_adm1_gaul.csv")) +View(EU27_crp_adm1) + +length(unique(EU27_crp_adm1$iso3)) +is.na(EU27_crp_adm1)#Correct! 27 countries present. +# Continue: Why is ALB in the final dataset. Check where this went wrong +# Validate Yields. Sum the ADM1 by hand. Check if matches +# Berechne ob das sinn macht > 319652 rows ?! + + +# Then add again the GADM and get values for the SOC% >> ADd the peat non peat to it. +# ADd usage to the sheet (or single sheet) +# + +# Bringing back some SPAM geo-variables to compare them with GADM +dat.agg.EU = dat_TA_yld %>% + dplyr::select(iso3, prod_level, alloc_key, x, y,name_cntr, name_adm1) %>% + right_join(.,dat.pxl.EU, by="alloc_key") + + + + +dat.agg.EU %>% dplyr::select(iso3) dat.pxl.EU %>% + # dplyr::select(-Y) #%>% + group_by(GID_1) %>% + summarize + +max(dat.pxl.EU$Y) +summarise() + +v <- validator() +summary(v) +cf <- confront(dat.pxl.EU, v) +summary(cf) +?confront + + + +# creating a tidy table with all tech variables in one df +list.comb <- list(yld_dat, prod_dat, har_dat, phy_dat) +list.comb2=list(dat_har, dat_phy, dat_yld, da) +dat.comb <- do.call("rbind", list.comb) +# write.csv(dat.comb, 'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/dat_comb_tidy.csv', row.names=T) + + + + + + + + + + + + + + + + + + + + + +# loading crop usage sheet -------------------------------------------------------------- +setwd(here("Input_dat")) +food_cat <- read_csv('foodnonfood.csv') +# Loading crop name sheets +nam_cat <- read_csv('crop_names.csv') +# loading FAO country codes +FAOcntr <- read_csv(here("Input_data", 'FAO_countries.csv')) +FAOcntr <- dplyr::select(FAOcntr, 'ISO3', 'FAOSTAT_CODE', "GAUL_CODE", 'Continent', 'Subcontinent') + +# joining all together +names(dat.comb)[8] <- "Crop_short" +names(nam_cat)[2] <- "Crop_short" +names(dat.comb)[2] <- "ISO3" + +join.use <- left_join(dat.comb, food_cat, by="Crop_short") +join_FAOcntr <- left_join(join.use, FAOcntr, by="ISO3") +SPAM_dat_tidy <- left_join(join_FAOcntr, nam_cat, by='Crop_short') + + + + + +AggregateFun_mean <- function(z){ #for yield + ddply(z, .(name_adm1, crop), + function(z) mean(z$valu[z$valu!=0])) +} + +AggregateFun_sum <- function(g){ #for area + ddply(g, .(name_adm1, crop), + function(g) sum(g$valu)) +} + +CollapseFun <- function(h){ + h %>% dplyr::distinct_at(vars('name_adm1', 'crop'), .keep_all = TRUE) +} + +CombineFun <- function(d,s){ + full_join(d,s, by=c("name_adm1", 'crop')) +} + + + +# rename and reoder dataset columns +Clean2Fun <- function(x){ + i <- x %>% dplyr::rename(SPAMcrop_short=Crop_short, + SPAMcrop_long=`SPAM long name`, + use=Usage) %>% + dplyr::select(-c(`No. crt.`, ID))%>% + arrange(name_adm1, SPAMcrop_short, rec_type, tech_type) + c <- i[c('ISO3', 'name_cntr', 'name_adm1', 'alloc_key', 'FAOSTAT_CODE', 'GAUL_CODE', 'SPAMcrop_short', 'Continent', 'Subcontinent', 'SPAMcrop_long', 'FAONAMES', 'FAOCODE', 'GROUP', 'use', 'rec_type', 'tech_type', 'unit', 'valu')] +} +SPAM_dat_tidy <- Clean2Fun(SPAM_dat_tidy) + +# Save file - Option long dataset +write.csv(SPAM_dat_tidy, here("Input_dat", "Yld_tidy_all.csv"), row.names=T) + +# Option2 - Harvested and Physical area horizontal +SprdFun <- function(x){ + t <- x %>% dplyr::select(-c(unit)) %>% + spread(rec_type, valu)%>% + dplyr::rename(yield_kgha=Y, + phys_area_ha=A, + harv_area_ha=H) + c <- t[c('Continent', 'Subcontinent', 'ISO3', 'name_cntr', 'name_adm1', 'alloc_key', 'FAOSTAT_CODE', 'GAUL_CODE', 'SPAMcrop_short', 'SPAMcrop_long', 'FAONAMES', 'FAOCODE', 'GROUP', 'use', 'tech_type', 'phys_area_ha', 'harv_area_ha', 'yield_kgha')] +} + +sprd_dat <- SprdFun(SPAM_dat_tidy) +write.csv(sprd_dat_fixd, here("Input_dat","SPAMspread_all_fixd.csv"), row.names=T) + + diff --git a/SPAM_Data_Transformation/SPAM_no_agg_spatial_2.R b/SPAM_Data_Transformation/SPAM_no_agg_spatial_2.R new file mode 100644 index 0000000000000000000000000000000000000000..d8dae4c1f94cbe496141b0fbf46cdf601ecc3770 --- /dev/null +++ b/SPAM_Data_Transformation/SPAM_no_agg_spatial_2.R @@ -0,0 +1,782 @@ +easypackages::packages(c("tidyverse", 'sf', 'raster','sp','dplyr','spatialEco', + 'exactextractr','tibble', 'here', "readr", + "readxl", "foreign", "data.table", "rbenchmark", "benchmarkme", + "validate")) +setwd(here("Input_data")) +# here::here() +##To know the current storage capacity +memory.limit() + +## To increase the storage capacity +# memory.limit(size=20000) +gc() +# Load data ---------------------------------------------------------------- +# Yields ------------------------------------------------------------------ +dat_TA_yld <- read_csv("spam2010V2r0_global_Y_TA.csv")# Yields - all technologies together, ie complete crop - Data downloaded on 1. of September 2020 +dat_TI_yld <- read_csv("spam2010V2r0_global_Y_TI.csv")# Yields - Iirrigated portion of crop +dat_TH_yld <- read_csv("spam2010V2r0_global_Y_TH.csv")# Yields - rainfed high inputs portion of crop +dat_TL_yld <- read_csv("spam2010V2r0_global_Y_TL.csv")# Yields - rainfed low inputs portion of crop +dat_TS_yld <- read_csv("spam2010V2r0_global_Y_TS.csv")# Yields - rainfed subsistence portion of crop +dat_TR_yld <- read_csv("spam2010V2r0_global_Y_TR.csv")# Yields - rainfed portion of crop (= TA - TI, or TH + TL + TS) + + +# Production +dat_TA_pr <- read_csv("spam2010V2r0_global_P_TA.csv") +dat_TI_pr <- read_csv("spam2010V2r0_global_P_TI.csv") +dat_TH_pr <- read_csv("spam2010V2r0_global_P_TH.csv") +dat_TL_pr <- read_csv("spam2010V2r0_global_P_TL.csv") +dat_TS_pr <- read_csv("spam2010V2r0_global_P_TS.csv") +dat_TR_pr <- read_csv("spam2010V2r0_global_P_TR.csv") + +# Physical area-------------------------------- +dat_TA_pa <- read_csv("spam2010V2r0_global_A_TA.csv") +dat_TI_pa <- read_csv("spam2010V2r0_global_A_TI.csv") +dat_TH_pa <- read_csv("spam2010V2r0_global_A_TH.csv") +dat_TL_pa <- read_csv("spam2010V2r0_global_A_TL.csv") +dat_TS_pa <- read_csv("spam2010V2r0_global_A_TS.csv") +dat_TR_pa <- read_csv("spam2010V2r0_global_A_TR.csv") + +# Harvesting area---------------------------------- +dat_TA_ha <- read_csv("spam2010V2r0_global_H_TA.csv") +dat_TI_ha <- read_csv("spam2010V2r0_global_H_TI.csv") +dat_TH_ha <- read_csv("spam2010V2r0_global_H_TH.csv") +dat_TL_ha <- read_csv("spam2010V2r0_global_H_TL.csv") +dat_TS_ha <- read_csv("spam2010V2r0_global_H_TS.csv") +dat_TR_ha <- read_csv("spam2010V2r0_global_H_TR.csv") + +# Functions - Yld --------------------------------------------------------------- +ChangeNames <- function(x) { ## Renaming function + crp <- c('whea', 'rice', 'maiz', 'barl', 'pmil', 'smil', 'sorg', 'ocer', 'pota', 'swpo', 'yams', 'cass', 'orts', 'bean', 'chic', 'cowp', 'pige', 'lent', 'opul', 'soyb', + 'grou', 'cnut', 'oilp', 'sunf', 'rape', 'sesa', 'ooil', 'sugc', 'sugb', 'cott', 'ofib', 'acof', 'rcof', 'coco', 'teas', 'toba', 'bana', 'plnt', 'trof', 'temf', 'vege', 'rest') + names(x)[10:51] <- crp + return(x) +} +GatherFun <- function(k){ + k %>% gather('crop', 'valu', whea:rest) +} +# ------------------------------------------- +# If no aggregartion +AggregateFun_mean <- function(z){ #for yield + ddply(z, .(name_adm1, crop), + function(z) mean(z$valu[z$valu!=0])) +} + +AggregateFun_sum <- function(g){ #for area + ddply(g, .(name_adm1, crop), + function(g) sum(g$valu)) +} + +CollapseFun <- function(h){ + h %>% dplyr::distinct_at(vars('name_adm1', 'crop'), .keep_all = TRUE) +} + +CombineFun <- function(d,s){ + full_join(d,s, by=c("name_adm1", 'crop')) +} +# ------------------------- +CleanFun <- function(l){ + l %>% + # dplyr::rename( + # # value = V1, + # cntr = iso3) %>% + dplyr::select(iso3, name_cntr, name_adm1, name_adm2, alloc_key, x, y, rec_type, tech_type, unit, whea:rest) +} + +TransformFun_yld <- function(d){ + p <- ChangeNames(d) + # l <- GatherFun(p) + # r <- AggregateFun_mean(l) + # y <- CollapseFun(l) + # u <- CombineFun(r,y) + j <- CleanFun(p) +} + +TransformFun_area <- function(d){ + p <- ChangeNames(d) + # l <- GatherFun(p) + # r <- AggregateFun_sum(l) + # y <- CollapseFun(l) + # u <- CombineFun(r,y) + j <- CleanFun(p) +} + + +# With aggregation long ----------------------------------------------------- +ChangeNames <- function(x) { ## Renaming function + crp <- c('whea', 'rice', 'maiz', 'barl', 'pmil', 'smil', 'sorg', 'ocer', 'pota', 'swpo', 'yams', 'cass', 'orts', 'bean', 'chic', 'cowp', 'pige', 'lent', 'opul', 'soyb', + 'grou', 'cnut', 'oilp', 'sunf', 'rape', 'sesa', 'ooil', 'sugc', 'sugb', 'cott', 'ofib', 'acof', 'rcof', 'coco', 'teas', 'toba', 'bana', 'plnt', 'trof', 'temf', 'vege', 'rest') + names(x)[10:51] <- crp + return(x) +} +GatherFun <- function(k){ + k %>% gather('crop', 'valu', whea:rest) +} + +AggregateFun_mean <- function(z){ #for yield + ddply(z, .(name_adm1, crop), + function(z) mean(z$valu[z$valu!=0])) +} + +AggregateFun_sum <- function(g){ #for area + ddply(g, .(name_adm1, crop), + function(g) sum(g$valu)) +} + +CollapseFun <- function(h){ + h %>% dplyr::distinct_at(vars('name_adm1', 'crop'), .keep_all = TRUE) +} + +CombineFun <- function(d,s){ + full_join(d,s, by=c("name_adm1", 'crop')) +} + +# CleanFun <- function(l){ +u %>% + dplyr::rename(value = V1) #%>% +dplyr::select(iso3, name_cntr, name_adm1, name_adm2, + alloc_key, x, y, rec_type, tech_type, + unit, whea:rest) +} + +TransformFun_yld <- function(d){ + p <- ChangeNames(d) + l <- GatherFun(p) + r <- AggregateFun_mean(l) + y <- CollapseFun(l) + u <- CombineFun(r,y) + j <- CleanFun(p) +} + +# TransformFun_area <- function(d){ +p <- ChangeNames(dat_TA_pd) +l <- GatherFun(p) +r <- AggregateFun_sum(p) +y <- CollapseFun(l) +u <- CombineFun(r,y) +j <- CleanFun(p) +} + + +# With aggregation l spread crops ---------------------------------------- + + + +ChangeNames <- function(x) { ## Renaming function + crp <- c('whea', 'rice', 'maiz', 'barl', 'pmil', 'smil', 'sorg', 'ocer', 'pota', 'swpo', 'yams', 'cass', 'orts', 'bean', 'chic', 'cowp', 'pige', 'lent', 'opul', 'soyb', + 'grou', 'cnut', 'oilp', 'sunf', 'rape', 'sesa', 'ooil', 'sugc', 'sugb', 'cott', 'ofib', 'acof', 'rcof', 'coco', 'teas', 'toba', 'bana', 'plnt', 'trof', 'temf', 'vege', 'rest') + names(x)[10:51] <- crp + return(x) +} + +GatherFun <- function(k){ + k %>% gather('crop', 'valu', whea:rest) +} + +# AggregateFun_mean <- function(z){ #for yield +# ddply(z, .(name_adm1, crop), +# function(z) mean(z$valu[z$valu!=0])) +# } + + +# } + +AggregateFun_sum <- function(g){ #for area + ddply(g, .(name_adm1, crop), + function(g) sum(g$valu)) +} + +CollapseFun <- function(h){ + h %>% dplyr::distinct_at(vars('name_adm1', 'crop'), .keep_all = TRUE) +} + +CombineFun <- function(d,s){ + full_join(d,s, by=c("name_adm1", 'crop')) +} + +CleanFun <- function(l){ + l %>% + # dplyr::rename(value = V1) #%>% + dplyr::select(iso3, name_adm1, + alloc_key, x, y, rec_type, tech_type, + unit, whea:rest) +} + +# TransformFun_yld <- function(d){ +p <- ChangeNames(d) +# l <- GatherFun(p) +# r <- AggregateFun_mean(l) +# y <- CollapseFun(l) +# u <- CombineFun(r,y) +j <- CleanFun(u) +# } + +TransformFun_area <- function(d){ + p <- ChangeNames(d) + # l <- GatherFun(p) + # r <- AggregateFun_sum(p) + # y <- CollapseFun(l) + # u <- CombineFun(r,y) + j <- CleanFun(p) +} + + + +# rm(dat_TS_yld) + +# Applying transformation to all yld_tech_types +TA_yld <- TransformFun_area(dat_TA_yld) +TH_yld <- TransformFun_area(dat_TH_yld) +TI_yld <- TransformFun_area(dat_TI_yld) +TL_yld <- TransformFun_area(dat_TL_yld) +TR_yld <- TransformFun_area(dat_TR_yld) +TS_yld <- TransformFun_area(dat_TS_yld) +lst.yld <- list(TA_yld, TH_yld, TI_yld, TL_yld, TR_yld, TS_yld) +dat_yld <- do.call("rbind", lst.yld) # merging all tech types +write.csv(dat_yld, 'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/Yield_full_cropspread.csv', row.names=T) + +# Applying transformation to production +TA_prd <- TransformFun_area(dat_TA_pd) +TH_prd <- TransformFun_area(dat_TH_pd) +TI_prd <- TransformFun_area(dat_TI_pd) +TL_prd <- TransformFun_area(dat_TL_pd) +TR_prd <- TransformFun_area(dat_TR_pd) +TS_prd <- TransformFun_area(dat_TS_pd) +lst.prd <- list(TA_prd, TH_prd, TI_prd, TL_prd, TR_prd, TS_prd) +dat_prd <- do.call("rbind", lst.prd) # merging all tech types +write.csv(dat_prd, 'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/Production_full_cropspread.csv', row.names=T) + +#Applying transformation to physical_tech types +TA_phy <- TransformFun_area(dat_TA_pa) +TH_phy <- TransformFun_area(dat_TH_pa) +TI_phy <- TransformFun_area(dat_TI_pa) +TL_phy <- TransformFun_area(dat_TL_pa) +TR_phy <- TransformFun_area(dat_TR_pa) +TS_phy <- TransformFun_area(dat_TS_pa) +lst.phy <- list(TA_phy, TH_phy, TI_phy, TL_phy, TR_phy, TS_phy) +dat_phy <- do.call("rbind", lst.phy) # merging all tech types +write.csv(dat_phy, 'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/Phys_area_full_cropspread.csv', row.names=T) + +#Applying transformation to physical_tech types +TA_har <- TransformFun_area(dat_TA_ha) +TH_har <- TransformFun_area(dat_TH_ha) +TI_har <- TransformFun_area(dat_TI_ha) +TL_har <- TransformFun_area(dat_TL_ha) +TR_har <- TransformFun_area(dat_TR_ha) +TS_har <- TransformFun_area(dat_TS_ha) +lst.har <- list(TA_har, TH_har, TI_har, TL_har, TR_har, TS_har) +dat_har <- do.call("rbind", lst.har) # merging all tech types +write.csv(dat_har, 'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/Harv_area_spatial_spread_SPAM.csv', row.names=T) + +# Independent -- Merging treenuts with SPAM ------------------------------- +# LOAD data from previous step (due to memory overuse) +setwd(here('Input_data', "Rectypes_full_cropspread")) +yld_dat <- read_csv('Yield_full_cropspread.csv') +prd_dat <- read_csv('Production_full_cropspread.csv') +har_dat <- read_csv('Harv_area_spatial_spread_SPAM.csv') +phy_dat <- read_csv('Phys_area_full_cropspread.csv') + + +# TREENUTS WITH GADM ------------------------------------------------------ +# # Load and check shapefile +# wd=setwd(here("Input_data")) +# sph.prod = read_sf(dsn = wd, layer = "GADM_treenut_rest_pd") +# sph.harv = read_sf(dsn = wd, layer = "GADM_treenut_rest_ha") +# sph.all = st_join(sph.prod, sph.harv, join = st_within) +# # write_sf(sph.all, dsn = here("Input_data", driver = "ESRI shapefile")) +# sph.prod.tib = as_tibble(sph.prod) +# sph.prod.tib = dplyr::select(sph.prod.tib, UID, GID_0, GID_1, trnt_r_) +# +# sph.harv.tib = as_tibble(sph.harv) +# sph.harv.tib = dplyr::select(sph.harv.tib, UID, GID_0, GID_1, trnt_r_) +# +# # loading SPAM-GADM join +# spam.gadm.link = read_sf(dsn = here("Input_data", "GADMtoSPAM", "GADM_to_SPAM_within"), +# layer = "GADM_to_SPAM_within") +# # +# # Treenuts GADM pixel --------------------------------------------------------------- +# # CAUTION: THESE FILES ARE WRONG! THE RATIO IS WRONGLY CALULATED (not grouped by adm1 and adm0 level) +# # Treenuts are loaded from the treenuts script +# # Production +# # rbenchmark::benchmark( +# spam.gadm.link %>% +# as_tibble() %>% +# left_join(.,sph.prod.tib, by="UID") %>% +# left_join(.,prd_dat, by="alloc_key") %>% +# mutate(tnuts = trnt_r_*rest) %>% +# mutate(rest.1 = (1-trnt_r_)*rest) %>% +# dplyr::select(alloc_key, UID, GID_0.x, GID_1.x, rec_type, tech_type, unit, +# whea:rest, tnuts, rest.1) %>% +# write.csv(here("Input_data", "prod_tnuts_spam_gadm.csv")) +# +# # +# # c=read_csv(here("Input_data", "prod_tnuts_spam_gadm.csv")) +# # +# # Approach validation > do the numbers make sense? +# # xy=dat_join_prod %>% +# # group_by(tnuts) %>% +# # top_n(n=10) %>% +# # dplyr::select(rest, tnuts, rest.1) +# +# # write_csv(dat_join_prod, here("Input_data", "dat_treenut_prod.csv")) +# # st_as_sf() +# +# # Harvested area +# # rbenchmark::benchmark( +# spam.gadm.link %>% +# as_tibble() %>% +# left_join(., har_dat, by="alloc_key") %>% +# left_join(.,sph.harv.tib, by="UID") %>% +# mutate(tnuts = trnt_r_*rest) %>% +# mutate(rest.1 = (1-trnt_r_)*rest) %>% +# dplyr::select(alloc_key, UID, GID_0.x, GID_1.x, rec_type, tech_type, unit, +# whea:rest, tnuts, rest.1) %>% +# write.csv(here("Input_data", "harv_tnuts_spam_gadm")) +# # ) +# +# # Physical land +# # rbenchmark::benchmark( +# spam.gadm.link %>% +# as_tibble() %>% +# left_join(., phy_dat, by="alloc_key") %>% +# left_join(.,sph.harv.tib, by="UID") %>% +# mutate(tnuts = trnt_r_*rest) %>% +# mutate(rest.1 = (1-trnt_r_)*rest) %>% +# dplyr::select(alloc_key, UID, GID_0.x, GID_1.x, rec_type, tech_type, unit, +# whea:rest, tnuts, rest.1) %>% +# write.csv(here("Input_data", "phys_tnuts_spam_gadm")) +# +# # Approach: Aim is to get dinal datasets on pixel level > Adm1 level +# 1_Cbind the three datasets +# 2_Gather crops +# 3_Spread levels rec type +# 4_Calculate yields after unit transformation + +# # 1. Load data +# # Global +# dat.phys = read_csv(here("Input_data", "phys_tnuts_spam_gadm.csv")) +# dat.prod = read_csv(here("Input_data", "prod_tnuts_spam_gadm,csv")) +# dat.harv = read_csv(here("Input_data", "harv_tnuts_spam_gadm.csv")) +# +# +# # lst.tnuts = list(dat.harv, dat.phys, dat.prod) +# dat_nutsall <- do.call("rbind", list(dat.harv, dat.phys, dat.prod)) +# +# # GLobal - Issue: Do this with the new laptop! +# # Principle of this function is to not generate any variable as this can lead to overuse of RAM. +# do.call("rbind", list(read_csv(here("Input_data", "phys_tnuts_spam_gadm")), +# read_csv(here("Input_data", "prod_tnuts_spam_gadm")), +# read_csv(here("Input_data", "harv_tnuts_spam_gadm")))) %>% +# dplyr::select(-c(rest, X1)) %>% +# pivot_longer(names_to = "crops", values_to = "value", cols = whea:rest.1) %>% +# pivot_wider(-unit, names_from = "rec_type", +# values_from = "value" ) %>% +# mutate(Y = (P*1000/H))%>% #checked for result. We are transforming mt to tons when multiplying Producction by 1000 +# mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% +# rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% +# write.csv(here("Input_data", "pxl_rec_tec_gadm_spam_tnuts")) + +# # SUBSETTING FOR EU-27 GADM ---------------------------------------------------------------------- +# EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", +# "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR") +# write.csv(EU_iso3, here("Input_data", "EU27_iso3_lst.csv")) +# length(EU_iso3) #27 member states > the list comes from gams +# +# # Check if the EU countries iso3 are all present in dat.phys etc. +# EU_iso3 %in% dat.phys$GID_0.x #Validation +# dat.phys.EU = read_csv(here("Input_data", "phys_tnuts_spam_gadm")) +# filter(dat.phys, GID_0.x %in% EU_iso3) +# +# dat.prod.EU = read_csv(here("Input_data", "prod_tnuts_spam_gadm")) %>% +# filter(dat.prod, GID_0.x %in% EU_iso3) +# +# dat.harv.EU = read_csv(here("Input_data", "harv_tnuts_spam_gadm")) %>% +# dat.harv.EU = filter(dat.harv, GID_0.x %in% EU_iso3) +# +# +# do.call("rbind", list(read_csv(here("Input_data", "phys_tnuts_spam_gadm")), +# read_csv(here("Input_data", "prod_tnuts_spam_gadm")), +# read_csv(here("Input_data", "harv_tnuts_spam_gadm")))) %>% +# filter(GID_0.x %in% EU_iso3) %>% +# dplyr::select(-c(rest, X1)) %>% +# pivot_longer(names_to = "crops", values_to = "value", cols = whea:rest.1) %>% +# pivot_wider(-unit, names_from = "rec_type", +# values_from = "value" ) %>% +# mutate(Y = (P*1000/H))%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. +# mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% +# rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% +# write.csv(here("Input_data", "pxl_rec_tec_gadm_spam_tnuts_EU27")) +# +# # Testing the approach +# { +# # cbind dfs - testing subset +# fil.prod = dat.prod %>% top_n(n=20) +# fil.harv = dat.harv %>% top_n(n=20) +# fil.phys = dat.phys %>% top_n(n=20) +# +# lst.tnuts = list(fil.harv, fil.phys, fil.prod) +# do.call("rbind", list(fil.harv, fil.phys, fil.prod)) +# +# long.dat.nuts <- do.call("rbind", list(fil.harv, fil.phys, fil.prod)) %>% +# dplyr::select(-c(X1)) %>% +# pivot_longer(names_to = "crops", values_to = "value", cols = whea:tnuts) %>% +# pivot_wider(-unit, names_from = "rec_type", +# values_from = "value" ) %>% +# mutate(Y = (P*1000/H))%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. +# mutate_at(vars(Y), ~replace(., is.nan(.), 0)) #%>% +# # rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% +# write.csv(here("Input_data", "test_rbind_fil")) +# } + + + + + +# SUBSETTING FOR EU-27 GAUL ---------------------------------------------------------------------- +EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", + "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR") +# write.csv(EU_iso3, here("Input_data", "EU27_iso3_lst.csv")) +length(EU_iso3) #27 member states > the list comes from gams + +# Check if the EU countries iso3 are all present in dat.phys etc. +# EU_iso3 %in% dat.phys$iso3 #Validation if all EU27 countries are in the dfs below +dat.phys.EU = read_csv(here("Input_data", "phys_tnuts_rest_spam_gaul_FINAL.csv")) %>% #Pixel level data + filter(iso3 %in% EU_iso3) %>% + mutate(alloc_key = str_sub(alloc_key, start=2)) %>% + dplyr::select(-c(X1_1, X1)) + + +dat.prod.EU = read_csv(here("Input_data", "prod_tnuts_rest_spam_gaul_FINAL1.csv")) %>% + filter(iso3 %in% EU_iso3)%>% + mutate(alloc_key = str_sub(alloc_key, start=2)) %>% + dplyr::select(-c(X1)) + + +dat.harv.EU = read_csv(here("Input_data", "harv_tnuts_rest_spam_gaul_FINAL.csv")) %>% + filter(iso3 %in% EU_iso3) %>% + mutate(alloc_key = str_sub(alloc_key, start=2)) %>% + dplyr::select(-c(X1_1, X1)) + + +dat.all.EU= do.call("rbind", list(dat.phys.EU, dat.prod.EU, dat.harv.EU)) %>% + # filter(iso3 %in% EU_iso3) %>% + # dplyr::select(-c(X1, X1_1)) %>% + pivot_longer(names_to = "crops", values_to = "value", cols = whea:tnuts) %>% + pivot_wider(-unit, names_from = "rec_type", + values_from = "value" ) %>% + mutate(Y = (P*1000/H)) #%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. +mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% + # rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% + write_csv(here("Input_data", "pxl_rec_tec_gaul_spam_tnuts_EU_27_corr_prod.csv")) #IMPORTANT dataset. Pixel level. Tnuts. +dat.all.EU=read_csv(here("Input_data","pxl_rec_tec_gaul_spam_tnuts_EU_27_corr_prod.csv")) + +# Testing the approach +# { +# # cbind dfs - testing subset +# fil.prod = dat.prod %>% top_n(n=20) +# fil.harv = dat.harv %>% top_n(n=20) +# fil.phys = dat.phys %>% top_n(n=20) +# +# lst.tnuts = list(fil.harv, fil.phys, fil.prod) +# x= do.call("rbind", list(fil.harv, fil.phys, fil.prod)) +# +# long.dat.nuts <- do.call("rbind", list(fil.harv, fil.phys, fil.prod)) %>% +# dplyr::select(-c(X1)) %>% +# pivot_longer(names_to = "crops", values_to = "value", cols = whea:rest) %>% +# pivot_wider(-unit, names_from = "rec_type", +# values_from = "value" ) #%>% +# mutate(Y = (P*1000/H))%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. +# mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% +# rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% +# write.csv(here("Input_data", "test_rbind_fil")) +# } + +# Crop name mapping ------------------------------------------------------- +# SPAM mapping +nam_cat <- read_csv(here("Input_data", 'crop_names.csv')) # Loading crop name sheets - mapping SPAM -- FAO (FROM SPAM) +nam_cat <- nam_cat %>% mutate(FAOCODE = as.double(FAOCODE)) %>% + rename(No_spam = names(nam_cat)[1]) + +GAEZ_SPAM_crp = read_csv(here("Input_data", "6-GAEZ-and-SPAM-crops-correspondence-2015-02-26.csv")) #Mapping GAEZ and SPAM (from SPAM) +GAEZ_SPAM_crp = GAEZ_SPAM_crp %>% + rename(No_spam = names(.)[4]) %>% + dplyr::select('SPAM crop long name', 'GAEZ crop', No_spam) + +# ORiginal Croplist Modeldat CIFOS +FAO_CIFOS_crp = read_csv(here("Input_data", "FAO_modeldat_2020.csv")) + +# FAO spreaddsheets +FAO_stat_control = read_csv(here("Input_data", "FAOSTAT_data_12-26-2020-1.csv")) +food_cat <- read_csv(here("Input_data",'foodnonfood.csv'))# laoding food usage sheet +food_cat = food_cat %>% + rename(`SPAM short name`=Crop_short) +FAOcntr <- read_csv(here("Input_data", 'FAO_countries.csv')) # loading FAO country codes +FAOcntr <- dplyr::select(FAOcntr, 'ISO3', 'FAOSTAT_CODE', "GAUL_CODE", 'Continent', 'Subcontinent') + + +# The incomplete mapping of SPAM, GAEZ, FAOstat, FAO_Cifos are made +# Not all the crops are matched perfectly. This is made by hand in excel (export-import a chunk below) +model_dat = left_join(FAO_CIFOS_crp, FAO_stat_control, by="crop") %>% + left_join(nam_cat, ., by="FAOCODE") %>% + left_join(.,GAEZ_SPAM_crp, by= "No_spam") %>% + write.csv(here("Input_data", "model_crops_incomplete.csv")) + +# This data frame is a complete mapping of GAEZ, SPAM and FAO (FAO original and CIFOS model dat FAO crop names) +# The FAO crop names from the model data differ slightly between Model dat and the fAO crops that I exctracted from FAOSTAT. +# Crop others were adjusted according to SPAM indication and the GAEZ mapped crops (cereal others = oats, etc.) + +dat_crp_map <- read_csv("model_crops_mapping_GAEZ_SPAM_FAO_complete.csv") +# dat_crp_map2 <- read_csv(here("Input_data", "model_crops_incomplete.csv")) + +#Validation > Result = Complete match! +# dat_crp_ma p2$ crop_cifos %in% FAO_CIFOS_crp$crop + +dat_crp_map1 = dat_crp_map %>% + rename(crop_cifos = crop) %>% + left_join(.,food_cat) #adding food categories (food/non-food) + +dat_crp_map1$Usage[42]="food" #adding the food category to treenuts +write.csv(dat_crp_map1, here("Input_data", "crop_name_mapping_complete.csv")) # this file can be used for further mapping of the crops +dat_crp_map=read_csv(here("Input_data", "crop_name_mapping_complete.csv"))# Aggregating to ADM1 level EU ----------------------------------------------- + + +# Pxl EU27 data ---------------------------------------------------------- + + +dat.pxl.EU = read_csv(here("Input_data", "pxl_rec_tec_gaul_spam_tnuts_EU_27_corr_prod.csv")) +dat_TA_yld = read_csv(here("Input_data", "spam2010V2r0_global_Y_TA.csv")) %>% + dplyr::select(alloc_key,name_cntr,name_adm1, iso3, x, y) %>% + mutate(alloc_key = str_sub(alloc_key, 2)) + + +# Mapping countries +FAOcntr <- read_csv(here("Input_data", 'FAO_countries.csv')) +FAOcntr <- dplyr::select(FAOcntr, 'ISO3', 'FAOSTAT_CODE', "GAUL_CODE", 'Continent', 'Subcontinent') %>% + rename(iso3=ISO3) +dat_EU27 = read_csv(here("Input_data", "Country_name_mapping_incomplete.csv"))%>% + left_join(.,FAOcntr, by="iso3") + +dat_crop_nam = read_csv(here("Input_data", "crop_name_mapping_complete.csv")) %>% + dplyr::select(-c(X1,X1_1)) %>% + rename(crops = 'SPAM short name') + +# Transform pixel data so that it has SPAM variables (adm1 level) + +dat.pixel.EU = dat.pxl.EU %>% + mutate_at(vars(alloc_key), funs(as.character)) %>% + left_join(.,dat_TA_yld, by="alloc_key") %>% + dplyr::select(alloc_key, iso3.x, name_cntr.x,name_adm1.x, iso3.x, G2008_1_ID, + ADM0_CODE, ADM0_NAME, ADM1_CODE, ADM1_NAME, x, y, crops, tech_type) %>% + rename(iso3=iso3.x, + name_cntr=name_cntr.x, + name_adm1=name_adm1.x) %>% + left_join(.,dat_crop_nam, by="crops") %>% + left_join(.,dat_EU27, by="iso3") %>% + dplyr::select(G2008_1_ID, tech_type, CIFOS_cntr_nam, crop_cifos, crops) + +# df with prod_level and Cifos country EU +EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", + "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR") + +dat_cntr_EU = dat.pxl.EU %>% + mutate_at(vars(alloc_key), funs(as.character)) %>% + left_join(.,dat_TA_yld, by="alloc_key") %>% + dplyr::select(iso3.x, name_adm1.x) %>% + dplyr::distinct_at(vars('iso3.x', "name_adm1.x"), .keep_all = F) %>% + rename(iso3=iso3.x, + name_adm1=name_adm1.x) %>% + filter(iso3 %in% EU_iso3) %>% + left_join(.,dat_EU27, by="iso3") %>% + dplyr::select(iso3, name_adm1, CIFOS_cntr_nam, Continent, Subcontinent)# %>% +dat_cntr_EU %>% write_csv(here("Input_data", "dat_cntr_EU.csv")) + +# rename( +# iso3=iso3.x, +# name_adm1=name_adm1.x) + +# df with crops and cifos crops +dat_crop_nam1 = read_csv(here("Input_data", "crop_name_mapping_complete.csv")) %>% + dplyr::select('SPAM short name', FAONAMES, crop_cifos) %>% + rename(crops = 'SPAM short name') + +# Aggregating H,A,P and calculating Yields per Adm1 level +dat.adm1.EU = dat.pxl.EU %>% + mutate_at(vars(alloc_key), funs(as.character)) %>% + # left_join(., dat.pxl.EU, by="ADM1_NAME") %>% + # mutate_at(vars(alloc_key), funs(as.character)) %>% + left_join(.,dat_TA_yld, by="alloc_key") %>% + dplyr::select(iso3.x, name_cntr.x, name_adm1.x, ADM0_NAME, ADM1_NAME, tech_type, crops, A, H, P) %>% + group_by(iso3.x, name_adm1.x, tech_type, crops) %>% + summarize(across(.cols = A:P, .fns = sum))%>% #all three rec types are summed per adm1(prod_level) and crop + mutate(Y = (P*1000/H))%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. + mutate_at(vars(Y), ~replace(., is.nan(.), 0)) #%>% + +# This is the sheet that contains the parameters that are important for the GAMS input file +# It contains yield, harvested and physical area per country, adm1 unit and crop. +EU27_crp_adm1 = dat.adm1.EU %>% + left_join(.,dat_cntr_EU, by=c("name_adm1.x"="name_adm1")) %>% + filter(iso3 %in% EU_iso3) %>% + left_join(.,dat_crop_nam1, by="crops") %>% + dplyr::select(iso3, FAONAMES, Subcontinent, Continent, CIFOS_cntr_nam, name_adm1, crop_cifos, A, H, P, Y) %>% + write.csv(here("Input_data", "EU27_crp_adm1.csv")) # Final crop yield,area,etc. file. + +# Validate +EU27_crp_adm1 <- read_csv("C:/Users/simon083/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Modified_scripts/R/CifoS_Crop/Input_data/EU27_crp_adm1.csv") +View(EU27_crp_adm1) + +length(unique(EU27_crp_adm1$iso3)) +is.na(EU27_crp_adm1)#Correct! 27 countries present. +# Continue: Why is ALB in the final dataset. Check where this went wrong +# Validate Yields. Sum the ADM1 by hand. Check if matches +# Berechne ob das sinn macht > 319652 rows ?! + + +# Then add again the GADM and get values for the SOC% >> ADd the peat non peat to it. +# ADd usage to the sheet (or single sheet) +# + +# Bringing back some SPAM geo-variables to compare them with GADM +dat.agg.EU = dat_TA_yld %>% + dplyr::select(iso3, alloc_key, x, y,name_cntr, name_adm1) %>% + right_join(.,dat.pxl.EU, by="alloc_key") + + + + +dat.agg.EU %>% dplyr::select(iso3) dat.pxl.EU %>% + # dplyr::select(-Y) #%>% + group_by(GID_1) %>% + summarize + +max(dat.pxl.EU$Y) +summarise() + +v <- validator() +summary(v) +cf <- confront(dat.pxl.EU, v) +summary(cf) +?confront + + + +# creating a tidy table with all tech variables in one df +list.comb <- list(yld_dat, prod_dat, har_dat, phy_dat) +list.comb2=list(dat_har, dat_phy, dat_yld, da) +dat.comb <- do.call("rbind", list.comb) +# write.csv(dat.comb, 'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/dat_comb_tidy.csv', row.names=T) + + + + + +# For ArcGIS - area physical per pixel -------------------------- + +setwd(here("Input_data")) +# Physical area-------------------------------- +dat_TA_pa <- read_csv("spam2010V2r0_global_A_TA.csv") +dat_TA_pa %>% dplyr::select(alloc_key, x, y) %>%write_csv(here("Input_data", "coordinates_SPAM.csv")) + + +Phys_area_full_cropspread <- read_csv("Rectypes_full_cropspread/Phys_area_full_cropspread.csv") +x=Phys_area_full_cropspread %>% + dplyr::rowwise() %>% + dplyr::mutate(phys_area_pixl = sum(dplyr::c_across(whea:rest), na.rm = T)) + +x2=x %>% + dplyr::select(alloc_key, x, y, rec_type, tech_type, phys_area_pixl) %>% + group_by(alloc_key) %>% + mutate(sum_phys_area_pixel = sum(phys_area_pixl)) + +unique_coord = x2 %>% + distinct_at(vars(alloc_key, sum_phys_area_pixel)) %>% + left_join(., Phys_area_full_cropspread, by="alloc_key") %>% + dplyr::select(alloc_key, x, y, sum_phys_area_pixel) %>% + distinct_at(vars(alloc_key, x, y, sum_phys_area_pixel)) + +write("unique_coord", here("Input_data", "physical_area_per_pixel.csv")) + + + + + + + + + + + + + + + + + +# loading crop usage sheet -------------------------------------------------------------- +setwd(here("Input_dat")) +food_cat <- read_csv('foodnonfood.csv') +# Loading crop name sheets +nam_cat <- read_csv('crop_names.csv') +# loading FAO country codes +FAOcntr <- read_csv(here("Input_data", 'FAO_countries.csv')) +FAOcntr <- dplyr::select(FAOcntr, 'ISO3', 'FAOSTAT_CODE', "GAUL_CODE", 'Continent', 'Subcontinent') + +# joining all together +names(dat.comb)[8] <- "Crop_short" +names(nam_cat)[2] <- "Crop_short" +names(dat.comb)[2] <- "ISO3" + +join.use <- left_join(dat.comb, food_cat, by="Crop_short") +join_FAOcntr <- left_join(join.use, FAOcntr, by="ISO3") +SPAM_dat_tidy <- left_join(join_FAOcntr, nam_cat, by='Crop_short') + + + + + +AggregateFun_mean <- function(z){ #for yield + ddply(z, .(name_adm1, crop), + function(z) mean(z$valu[z$valu!=0])) +} + +AggregateFun_sum <- function(g){ #for area + ddply(g, .(name_adm1, crop), + function(g) sum(g$valu)) +} + +CollapseFun <- function(h){ + h %>% dplyr::distinct_at(vars('name_adm1', 'crop'), .keep_all = TRUE) +} + +CombineFun <- function(d,s){ + full_join(d,s, by=c("name_adm1", 'crop')) +} + + + +# rename and reoder dataset columns +Clean2Fun <- function(x){ + i <- x %>% dplyr::rename(SPAMcrop_short=Crop_short, + SPAMcrop_long=`SPAM long name`, + use=Usage) %>% + dplyr::select(-c(`No. crt.`, ID))%>% + arrange(name_adm1, SPAMcrop_short, rec_type, tech_type) + c <- i[c('ISO3', 'name_cntr', 'name_adm1', 'alloc_key', 'FAOSTAT_CODE', 'GAUL_CODE', 'SPAMcrop_short', 'Continent', 'Subcontinent', 'SPAMcrop_long', 'FAONAMES', 'FAOCODE', 'GROUP', 'use', 'rec_type', 'tech_type', 'unit', 'valu')] +} +SPAM_dat_tidy <- Clean2Fun(SPAM_dat_tidy) + +# Save file - Option long dataset +write.csv(SPAM_dat_tidy, here("Input_dat", "Yld_tidy_all.csv"), row.names=T) + +# Option2 - Harvested and Physical area horizontal +SprdFun <- function(x){ + t <- x %>% dplyr::select(-c(unit)) %>% + spread(rec_type, valu)%>% + dplyr::rename(yield_kgha=Y, + phys_area_ha=A, + harv_area_ha=H) + c <- t[c('Continent', 'Subcontinent', 'ISO3', 'name_cntr', 'name_adm1', 'alloc_key', 'FAOSTAT_CODE', 'GAUL_CODE', 'SPAMcrop_short', 'SPAMcrop_long', 'FAONAMES', 'FAOCODE', 'GROUP', 'use', 'tech_type', 'phys_area_ha', 'harv_area_ha', 'yield_kgha')] +} + +sprd_dat <- SprdFun(SPAM_dat_tidy) +write.csv(sprd_dat_fixd, here("Input_dat","SPAMspread_all_fixd.csv"), row.names=T) + + diff --git a/Treenuts/TreenutsGAUL.R b/Treenuts/TreenutsGAUL.R new file mode 100644 index 0000000000000000000000000000000000000000..2d1c34659713fbb7694d01e2cd212126e133f707 --- /dev/null +++ b/Treenuts/TreenutsGAUL.R @@ -0,0 +1,975 @@ +easypackages::packages(c("tidyverse", 'sf', 'raster','sp','dplyr','spatialEco', + 'exactextractr','tibble', 'here', "readr", "margrittr","tydr", + "readxl", "sfheaders")) + +# New approach ----------------------------------------------------------- +# Idea: +# 1. Load all the harvested area and yields (GeoTIFF) +# 2. Extract to point of the Spam cells +# 3. +# 4. Then look at the proportion between Treenuts and the rest of spam crop others for each pixel + +# Zonal statistics ------------------------------------------------------- +wd=setwd(here("Input_data")) + +# Load and check shapefile +shape_gadm=read_sf(dsn = wd, layer = "gadm36") # multipolygons containing all admin levels globally +shape_gadm=dplyr::select(shape_gadm, 'UID', 'geometry', 'GID_0', 'GID_1') + +shape_gaul=read_sf(here("Input_data", "g2008_1.shp")) #actual GAUL map +shape_gaul=dplyr::select(shape_gaul, 'G2008_1_ID', 'ADM1_CODE', "ADM0_CODE", 'ADM0_NAME', 'ADM1_NAME', "CONTINENT", "REGION", "geometry") +# class(shape_gadm) +# shape_fil=filter(shape, GID_0 == 'FIN') +dat_spam=read_csv(here("Input_data", "spam2010V2r0_global_Y_TA.csv")) +dat_spam=dplyr::select(dat_spam, x,y, alloc_key, name_adm1, iso3) + +# Make SPAM spatial +# projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0" +# proj_mercator = "+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs" +proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below +proj_meter = "+datum=WGS84 +units=m +no_defs" +dat_cent <- st_as_sf(x = dat_spam, + coords = c("x", "y"), + crs = proj) + + +# Load treenuts and rest files +{ +# ylds - Nuts +rast_walnut_yld = raster(here("Input_data","walnut_YieldPerHectare.tif")) +{ + +# class(rast_walnut_yld) + +# rast_walnut_withoutNA = rast_walnut_yld +# # +# mean(rast_walnut_yld$walnut_YieldPerHectare) +# mean(rast_walnut_withoutNA$walnut_YieldPerHectare) +# rast_walnut_withoutNA[rast_walnut_withoutNA$walnut_YieldPerHectare == 0] <- NA +# check=as.raster(rast_walnut_withoutNA) +# +# cellStats(rast_walnut_withoutNA$walnut_YieldPerHectare, mean) +# +# cellStats(rast_walnut_withoutNA, mean) +# cellStats(rast_walnut_yld, mean) +# rast_walnut_withoutNA$walnut_YieldPerHectare +} +rast_almond_yld = raster(here("Input_data","almond_YieldPerHectare.tif")) +rast_chestnut_yld = raster(here("Input_data","chestnut_YieldPerHectare.tif")) +rast_cashew_yld = raster(here("Input_data","cashew_YieldPerHectare.tif")) +rast_pistachio_yld = raster(here("Input_data","pistachio_YieldPerHectare.tif")) +rast_kolanut_yld = raster(here("Input_data", "kolanut_YieldPerHectare.tif")) +rast_hazelnut_yld = raster(here("Input_data","hazelnut_YieldPerHectare.tif")) +rast_brazil_yld = raster(here("Input_data","brazil_YieldPerHectare.tif")) +rast_areca_yld = raster(here("Input_data","areca_YieldPerHectare.tif")) +rast_nutnes_yld = raster(here("Input_data","nutnes_YieldPerHectare.tif")) +{ +# 0's to Na's +# rast_almond_yldNA=rast_almond_yld[rast_almond_yld$almond_YieldPerHectare == 0] <- NA + +# rast_almond_yld[rast_almond_yld$almond_YieldPerHectare == 0] <- NA + +# cellStats(rast_walnut_withoutNA$walnut_YieldPerHectare, min) +# cellStats(rast_walnut_yld$walnut_YieldPerHectare, min) + +# rast_almond_yld +} +# yld - Rest +rast_anise_yld = raster(here("Input_data","aniseetc_YieldPerHectare.tif")) +rast_chili_yld = raster(here("Input_data","chilleetc_YieldPerHectare.tif")) +rast_cinnamon_yld = raster(here("Input_data","cinnamon_YieldPerHectare.tif")) +rast_ginger_yld = raster(here("Input_data",'ginger_YieldPerHectare.tif')) +rast_hop_yld = raster(here("Input_data",'hop_YieldPerHectare.tif')) +rast_mate_yld = raster(here("Input_data",'mate_YieldPerHectare.tif')) +rast_nutmeg_yld = raster(here("Input_data","nutmeg_YieldPerHectare.tif")) +rast_pepper_yld = raster(here("Input_data",'pepper_YieldPerHectare.tif')) +rast_peppermint_yld = raster(here("Input_data",'peppermint_YieldPerHectare.tif')) +rast_pyreth_yld = raster(here("Input_data",'pyrethrum_YieldPerHectare.tif')) +rast_ramie_yld = raster(here("Input_data",'ramie_YieldPerHectare.tif')) +rast_ruber_yld = raster(here("Input_data",'rubber_YieldPerHectare.tif')) +rast_spices_yld = raster(here("Input_data",'spicenes_YieldPerHectare.tif')) +rast_sugarnes_yld = raster(here("Input_data",'sugarnes_YieldPerHectare.tif')) +rast_vanilla_yld = raster(here("Input_data",'vanilla_YieldPerHectare.tif')) + +# Production - treenuts + +rast_walnut_pd = raster(here("Input_data","walnut_Production.tif")) +rast_almond_pd = raster(here("Input_data","almond_Production.tif")) +rast_chestnut_pd = raster(here("Input_data","chestnut_Production.tif")) +rast_cashew_pd = raster(here("Input_data","cashew_Production.tif")) +rast_pistachio_pd = raster(here("Input_data","pistachio_Production.tif")) +rast_kolanut_pd = raster(here("Input_data", "kolanut_Production.tif")) +rast_hazelnut_pd = raster(here("Input_data","hazelnut_Production.tif")) +rast_brazil_pd = raster(here("Input_data","brazil_Production.tif")) +rast_areca_pd = raster(here("Input_data","areca_Production.tif")) +rast_nutnes_pd = raster(here("Input_data","nutnes_Production.tif")) +# Production -rest +rast_anise_pd = raster(here("Input_data","aniseetc_Production.tif")) +rast_chili_pd = raster(here("Input_data","chilleetc_Production.tif")) +rast_cinnamon_pd = raster(here("Input_data","cinnamon_Production.tif")) +rast_ginger_pd = raster(here("Input_data",'ginger_Production.tif')) +rast_hop_pd = raster(here("Input_data",'hop_Production.tif')) +rast_mate_pd = raster(here("Input_data",'mate_Production.tif')) +rast_nutmeg_pd = raster(here("Input_data","nutmeg_Production.tif")) +rast_pepper_pd = raster(here("Input_data",'pepper_Production.tif')) +rast_peppermint_pd = raster(here("Input_data",'peppermint_Production.tif')) +rast_pyreth_pd = raster(here("Input_data",'pyrethrum_Production.tif')) +rast_ramie_pd = raster(here("Input_data",'ramie_Production.tif')) +rast_ruber_pd = raster(here("Input_data",'rubber_Production.tif')) +rast_spices_pd = raster(here("Input_data",'spicenes_Production.tif')) +rast_sugarnes_pd = raster(here("Input_data",'sugarnes_Production.tif')) +rast_vanilla_pd = raster(here("Input_data",'vanilla_Production.tif')) + + +# harvested area - treentus +rast_walnut_ha = raster(here("Input_data","walnut_HarvestedAreaHectares.tif")) +rast_almond_ha = raster(here("Input_data","almond_HarvestedAreaHectares.tif")) +rast_chestnut_ha = raster(here("Input_data","chestnut_HarvestedAreaHectares.tif")) +rast_cashew_ha = raster(here("Input_data","cashew_HarvestedAreaHectares.tif")) +rast_pistachio_ha = raster(here("Input_data","pistachio_HarvestedAreaHectares.tif")) +rast_kolanut_ha = raster(here("Input_data", "kolanut_HarvestedAreaHectares.tif")) +rast_hazelnut_ha = raster(here("Input_data","hazelnut_HarvestedAreaHectares.tif")) +rast_brazil_ha = raster(here("Input_data","brazil_HarvestedAreaHectares.tif")) +rast_areca_ha = raster(here("Input_data","areca_HarvestedAreaHectares.tif")) +rast_nutnes_ha = raster(here("Input_data","nutnes_HarvestedAreaHectares.tif")) +# HarvestedAreaHectares -rest +rast_anise_ha = raster(here("Input_data","aniseetc_HarvestedAreaHectares.tif")) +rast_chili_ha = raster(here("Input_data","chilleetc_HarvestedAreaHectares.tif")) +rast_cinnamon_ha = raster(here("Input_data","cinnamon_HarvestedAreaHectares.tif")) +rast_ginger_ha = raster(here("Input_data",'ginger_HarvestedAreaHectares.tif')) +rast_hop_ha = raster(here("Input_data",'hop_HarvestedAreaHectares.tif')) +rast_mate_ha = raster(here("Input_data",'mate_HarvestedAreaHectares.tif')) +rast_nutmeg_ha = raster(here("Input_data","nutmeg_HarvestedAreaHectares.tif")) +rast_pepper_ha = raster(here("Input_data",'pepper_HarvestedAreaHectares.tif')) +rast_peppermint_ha = raster(here("Input_data",'peppermint_HarvestedAreaHectares.tif')) +rast_pyreth_ha = raster(here("Input_data",'pyrethrum_HarvestedAreaHectares.tif')) +rast_ramie_ha = raster(here("Input_data",'ramie_HarvestedAreaHectares.tif')) +rast_ruber_ha = raster(here("Input_data",'rubber_HarvestedAreaHectares.tif')) +rast_spices_ha = raster(here("Input_data",'spicenes_HarvestedAreaHectares.tif')) +rast_sugarnes_ha = raster(here("Input_data",'sugarnes_HarvestedAreaHectares.tif')) +rast_vanilla_ha = raster(here("Input_data",'vanilla_HarvestedAreaHectares.tif')) +} + +# yld_list=list(rast_almond_yld, rast_cashew_yld, rast_chestnut_yld, rast_hazelnut_yld, rast_kolanut_yld, rast_pistachio_yld, rast_walnut_yld) +# stack_yld=stack(rast_almond_yld, rast_cashew_yld, rast_chestnut_yld, rast_hazelnut_yld, rast_kolanut_yld, rast_pistachio_yld, rast_walnut_yld) +# stack_ha=stack(rast_almond_ha, rast_cashew_ha, rast_chestnut_ha, rast_hazelnut_ha, rast_kolanut_ha, rast_pistachio_ha, rast_walnut_ha) +# +# dat$mean.nfertilizer_global +# summary(dat$mean.nfertilizer_global) +# + +# Perform zonal statistics ------------------------------------------------ +# FunZone_yld=function(rast,shape, na.rm=T){ # raster and vector +# c=zonal.stats(shape, rast, stats = c("Fun_nzmean")) +# c$UID <- shape$UID # assigning the identifier of the original df to the extracted df +# merge(shape, c, by.x = 'UID', by.y = 'UID') # merge the two files together +# } +# GADM +FunZone_sum=function(rast){ # raster and vector + c=zonal.stats(shape, rast, stats = c("sum")) + c$UID <- shape$UID # assigning the identifier of the original df to the extracted df + dat <- merge(shape, c, by.x = 'UID', by.y = 'UID') # merge the two files together +} + +# GAUL +FunZone_sum=function(rast){ # raster and vector + c=zonal.stats(shape_gaul, rast, stats = c("sum")) + c$G2008_1_ID <- shape_gaul$G2008_1_ID # assigning the identifier of the original df to the extracted df + dat <- merge(shape_gaul, c, by.x = 'G2008_1_ID', by.y = 'G2008_1_ID') # merge the two files together +} + +# +# original ---------------------------------------------------------------- +FunZone=function(rast, shape, na.rm=T){ # raster and vector + c=zonal.stats(shape, rast, stats = c("mean")) + c$UID <- shape$UID # assigning the identifier of the original df to the extracted df + dat <- merge(shape, c, by.x = 'UID', by.y = 'UID') # merge the two files togethere +} + +# # test ignoring 0s -------------------------------------------------------- +# nzmean <- function(x) { +# zvals <- x==0 +# if (all(zvals)) 0 else mean(x[!zvals]) +# } +# +# Fun_nzmean=function(r, na.rm=F){ +# mean(r[r!=0]) +# } +# +# FunZone_yld_0=function(rast,shape){ # raster and vector +# c=zonal.stats(shape, rast, stats = c("nzmean")) +# c$UID <- shape$UID # assigning the identifier of the original df to the extracted df +# merge(shape, c, by.x = 'UID', by.y = 'UID') # merge the two files together +# } +# +# # Test raster +# v2=raster::extract(rast_walnut_yld,shape_fil,fun='Fun_nzmean') + + +# Yields ------------------------------------------------------------------ +# Apply zonal stats to treenuts +ex_walnut_yld = FunZone_yld(rast_walnut_yld,shape_fil) +ex_walnut_yld_0 = FunZone_yld(rast_walnut_yld,shape_fil) + +# mean(ex_walnut_yld$mean.walnut_YieldPerHectare) +# mean(ex_walnut_yld_0$mean.walnut_YieldPerHectare) +# mean(ex_walnut_withNA_FIn$mean.walnut_YieldPerHectare) + +ex_walnut_withNA_FIn = FunZone_yld(rast_walnut_withoutNA,shape_fil) +ex_walnut_yld$nzmean.walnut_YieldPerHectare +# max(ex_walnut_withNA_FIn$mean.walnut_YieldPerHectare) + + +ex_almond_yld = FunZone_yld(rast_almond_yld) +ex_chestnut_yld = FunZone_yld(rast_chestnut_yld) +ex_cashew_yld = FunZone_yld(rast_cashew_yld) +ex_pistachio_yld = FunZone_yld(rast_pistachio_yld) +ex_kolanut_yld = FunZone_yld(rast_kolanut_yld) +ex_hazelnut_yld = FunZone_yld(rast_hazelnut_yld) +ex_brazil_yld = FunZone_yld(rast_brazil_yld) +ex_areca_yld = FunZone_yld(rast_areca_yld) +ex_nutnes_yld = FunZone_yld(rast_nutnes_yld) + +# Apply zonal stats to rest +ex_anise_yld = FunZone_yld(rast_anise_yld) +ex_chili_yld = FunZone_yld(rast_chili_yld) +ex_cinnamon_yld =FunZone_yld(rast_cinnamon_yld) +ex_ginger_yld = FunZone_yld(rast_ginger_yld) +ex_hop_yld = FunZone_yld(rast_hop_yld) +ex_mate_yld = FunZone_yld(rast_mate_yld) +ex_nutmeg_yld = FunZone_yld(rast_nutmeg_yld) +ex_pepper_yld = FunZone_yld(rast_pepper_yld) +ex_peppermint_yld = FunZone_yld(rast_peppermint_yld) +ex_pyreth_yld = FunZone_yld(rast_pyreth_yld) +ex_ramie_yld = FunZone_yld(rast_ramie_yld) +ex_ruber_yld = FunZone_yld(rast_ruber_yld) +ex_spices_yld = FunZone_yld(rast_spices_yld) +ex_sugarnes_yld = FunZone_yld(rast_sugarnes_yld) +ex_vanilla_yld = FunZone_yld(rast_vanilla_yld) + +# Join columns (cbind) +lst.ex.yld = list(ex_walnut_yld,ex_almond_yld,ex_chestnut_yld,ex_cashew_yld,ex_pistachio_yld, ex_kolanut_yld, ex_hazelnut_yld, ex_areca_yld, ex_brazil_yld, ex_nutnes_yld, + ex_anise_yld, ex_chili_yld, ex_cinnamon_yld, ex_ginger_yld, ex_hop_yld, ex_mate_yld, ex_nutmeg_yld, ex_pepper_yld, ex_peppermint_yld, ex_pyreth_yld, ex_ramie_yld, ex_ruber_yld, ex_spices_yld, ex_sugarnes_yld, ex_vanilla_yld) +dat_ex.yld = do.call("cbind", lst.ex.yld) # merging all tech types +# head(dat_ex.yld) + +treenut_yld = dat_ex.yld %>% + select(UID, GID_0,GID_1,mean.walnut_YieldPerHectare, + mean.almond_YieldPerHectare, + mean.chestnut_YieldPerHectare, + mean.cashew_YieldPerHectare, + mean.pistachio_YieldPerHectare, + mean.kolanut_YieldPerHectare, + mean.hazelnut_YieldPerHectare, + mean.areca_YieldPerHectare, + mean.brazil_YieldPerHectare, + mean.nutnes_YieldPerHectare, + geometry) %>% + rowwise()%>% + mutate(treenut_yld= rowmean(c_across(mean.almond_YieldPerHectare:mean.areca_YieldPerHectare)))%>% + mutate(treenut_yld= rowmean(c_across(mean.anise_YieldPerHectare:mean.vanilla_YieldPerHectare))) + + +# class(treenut_yld) +# write.csv(treenut_yld, 'D:/Raw_dat/Raw_dat/Ramakutty/HarvestedAreaYield175Crops_Geotiff/HarvestedAreaYield175Crops_Geotiff/Treenuts', row.names=T) +st_write(treenut_yld, here("Input_data", "GADM_treenut.shp"), driver="ESRI Shapefile") # create to a shapefile + + +# Production ------------------------------------------------------------- + +ex_walnut_pd = FunZone_sum(rast_walnut_pd) +ex_almond_pd = FunZone_sum(rast_almond_pd) +ex_chestnut_pd = FunZone_sum(rast_chestnut_pd) +ex_cashew_pd = FunZone_sum(rast_cashew_pd) +ex_pistachio_pd = FunZone_sum(rast_pistachio_pd) +ex_kolanut_pd = FunZone_sum(rast_kolanut_pd) +ex_hazelnut_pd = FunZone_sum(rast_hazelnut_pd) +ex_brazil_pd = FunZone_sum(rast_brazil_pd) +ex_areca_pd = FunZone_sum(rast_areca_pd) +ex_nutnes_pd = FunZone_sum(rast_nutnes_pd) + +# Rest +ex_anise_pd = FunZone_sum(rast_anise_pd) +ex_chili_pd = FunZone_sum(rast_chili_pd) +ex_cinnamon_pd =FunZone_sum(rast_cinnamon_pd) +ex_ginger_pd = FunZone_sum(rast_ginger_pd) +ex_hop_pd = FunZone_sum(rast_hop_pd) +ex_mate_pd = FunZone_sum(rast_mate_pd) +ex_nutmeg_pd = FunZone_sum(rast_nutmeg_pd) +ex_pepper_pd = FunZone_sum(rast_pepper_pd) +ex_peppermint_pd = FunZone_sum(rast_peppermint_pd) +ex_pyreth_pd = FunZone_sum(rast_pyreth_pd) +ex_ramie_pd = FunZone_sum(rast_ramie_pd) +ex_ruber_pd = FunZone_sum(rast_ruber_pd) +ex_spices_pd = FunZone_sum(rast_spices_pd) +ex_sugarnes_pd = FunZone_sum(rast_sugarnes_pd) +ex_vanilla_pd = FunZone_sum(rast_vanilla_pd) + +# list all treenuts +lst.ex.pd = list(ex_walnut_pd,ex_almond_pd,ex_chestnut_pd,ex_cashew_pd,ex_pistachio_pd, ex_kolanut_pd, ex_hazelnut_pd, ex_areca_pd, ex_nutnes_pd, + ex_anise_pd, ex_chili_pd, ex_cinnamon_pd, ex_ginger_pd, ex_hop_pd, ex_mate_pd, ex_nutmeg_pd, ex_pepper_pd, ex_peppermint_pd, ex_pyreth_pd, ex_ramie_pd, ex_ruber_pd, ex_spices_pd, ex_sugarnes_pd, ex_vanilla_pd) +dat_ex.pd = do.call("cbind", lst.ex.pd) # merging all tech types + +# list rest + +treenut_pd = dat_ex.pd %>% + # dplyr::select(UID, GID_0,GID_1,sum.walnut_Production, + dplyr::select(G2008_1_ID, ADM1_CODE,ADM0_CODE, ADM1_NAME,ADM0_NAME, CONTINENT, REGION, + sum.walnut_Production, + sum.almond_Production, + sum.chestnut_Production, + sum.cashew_Production, + sum.pistachio_Production, + sum.kolanut_Production, + sum.hazelnut_Production, + sum.aniseetc_Production, + sum.chilleetc_Production, + sum.cinnamon_Production, + sum.ginger_Production, + sum.hop_Production, + sum.mate_Production, + sum.nutmeg_Production, + sum.pepper_Production, + sum.peppermint_Production, + sum.pyrethrum_Production, + sum.ramie_Production, + sum.rubber_Production, + sum.spicenes_Production, + sum.sugarnes_Production, + sum.vanilla_Production, + geometry) %>% + group_by(ADM1_CODE, ADM0_CODE) %>% + mutate(treenut_pd= sum(sum.walnut_Production, + sum.almond_Production, + sum.chestnut_Production, + sum.cashew_Production, + sum.pistachio_Production, + sum.kolanut_Production, + sum.hazelnut_Production))%>% + mutate(rest_pd = sum(sum.aniseetc_Production, + sum.chilleetc_Production, + sum.cinnamon_Production, + sum.ginger_Production, + sum.hop_Production, + sum.mate_Production, + sum.nutmeg_Production, + sum.pepper_Production, + sum.peppermint_Production, + sum.pyrethrum_Production, + sum.ramie_Production, + sum.rubber_Production, + sum.spicenes_Production, + sum.sugarnes_Production, + sum.vanilla_Production)) %>% + dplyr::select(G2008_1_ID, ADM1_CODE,ADM0_CODE, ADM1_NAME, ADM0_NAME, CONTINENT, REGION, treenut_pd, rest_pd) + + +# class(treenut_yld) +# write.csv(treenut_yld, 'D:/Raw_dat/Raw_dat/Ramakutty/HarvestedAreaYield175Crops_Geotiff/HarvestedAreaYield175Crops_Geotiff/Treenuts', row.names=T) +st_write(treenut_pd, here("Input_data", "GAUL_treenut_rest_pd.shp"), driver="ESRI Shapefile") # create to a shapefile +treenut_pd= read_sf(here("Input_data", "GAUL_treenut_rest_pd,shp", "GAUL_treenut_rest_pd.shp")) + +# Harvested area - Treenuts ----------------------------------------------- + +ex_walnut_ha = FunZone_sum(rast_walnut_ha) +ex_almond_ha = FunZone_sum(rast_almond_ha) +ex_chestnut_ha = FunZone_sum(rast_chestnut_ha) +ex_cashew_ha = FunZone_sum(rast_cashew_ha) +ex_pistachio_ha = FunZone_sum(rast_pistachio_ha) +ex_kolanut_ha = FunZone_sum(rast_kolanut_ha) +ex_hazelnut_ha = FunZone_sum(rast_hazelnut_ha) +ex_brazil_ha = FunZone_sum(rast_brazil_ha) +ex_areca_ha = FunZone_sum(rast_areca_ha) +ex_nutnes_ha = FunZone_sum(rast_nutnes_ha) + +# Rest +ex_anise_ha = FunZone_sum(rast_anise_ha) +ex_chili_ha = FunZone_sum(rast_chili_ha) +ex_cinnamon_ha =FunZone_sum(rast_cinnamon_ha) +ex_ginger_ha = FunZone_sum(rast_ginger_ha) +ex_hop_ha = FunZone_sum(rast_hop_ha) +ex_mate_ha = FunZone_sum(rast_mate_ha) +ex_nutmeg_ha = FunZone_sum(rast_nutmeg_ha) +ex_pepper_ha = FunZone_sum(rast_pepper_ha) +ex_peppermint_ha = FunZone_sum(rast_peppermint_ha) +ex_pyreth_ha = FunZone_sum(rast_pyreth_ha) +ex_ramie_ha = FunZone_sum(rast_ramie_ha) +ex_ruber_ha = FunZone_sum(rast_ruber_ha) +ex_spices_ha = FunZone_sum(rast_spices_ha) +ex_sugarnes_ha = FunZone_sum(rast_sugarnes_ha) +ex_vanilla_ha = FunZone_sum(rast_vanilla_ha) + +# list all treenuts +lst.ex.ha = list(ex_walnut_ha,ex_almond_ha,ex_chestnut_ha,ex_cashew_ha,ex_pistachio_ha, ex_kolanut_ha, ex_hazelnut_ha, ex_areca_ha, ex_nutnes_ha, + ex_anise_ha, ex_chili_ha, ex_cinnamon_ha, ex_ginger_ha, ex_hop_ha, ex_mate_ha, ex_nutmeg_ha, ex_pepper_ha, ex_peppermint_ha, ex_pyreth_ha, ex_ramie_ha, ex_ruber_ha, ex_spices_ha, ex_sugarnes_ha, ex_vanilla_ha) +dat_ex.ha = do.call("cbind", lst.ex.ha) # merging all tech types + +# list rest + +treenut_ha = dat_ex.ha %>% + dplyr::select(G2008_1_ID, ADM1_CODE,ADM0_CODE, ADM1_NAME, ADM0_NAME, CONTINENT, REGION, + sum.walnut_HarvestedAreaHectares, + sum.almond_HarvestedAreaHectares, + sum.chestnut_HarvestedAreaHectares, + sum.cashew_HarvestedAreaHectares, + sum.pistachio_HarvestedAreaHectares, + sum.kolanut_HarvestedAreaHectares, + sum.hazelnut_HarvestedAreaHectares, + sum.aniseetc_HarvestedAreaHectares, + sum.chilleetc_HarvestedAreaHectares, + sum.cinnamon_HarvestedAreaHectares, + sum.ginger_HarvestedAreaHectares, + sum.hop_HarvestedAreaHectares, + sum.mate_HarvestedAreaHectares, + sum.nutmeg_HarvestedAreaHectares, + sum.pepper_HarvestedAreaHectares, + sum.peppermint_HarvestedAreaHectares, + sum.pyrethrum_HarvestedAreaHectares, + sum.ramie_HarvestedAreaHectares, + sum.rubber_HarvestedAreaHectares, + sum.spicenes_HarvestedAreaHectares, + sum.sugarnes_HarvestedAreaHectares, + sum.vanilla_HarvestedAreaHectares, + geometry) %>% + group_by(ADM0_CODE, ADM1_CODE) %>% + mutate(treenut_ha = sum(sum.walnut_HarvestedAreaHectares, + sum.almond_HarvestedAreaHectares, + sum.chestnut_HarvestedAreaHectares, + sum.cashew_HarvestedAreaHectares, + sum.pistachio_HarvestedAreaHectares, + sum.kolanut_HarvestedAreaHectares, + sum.hazelnut_HarvestedAreaHectares)) %>% + mutate(rest_ha = sum(sum.aniseetc_HarvestedAreaHectares, + sum.chilleetc_HarvestedAreaHectares, + sum.cinnamon_HarvestedAreaHectares, + sum.ginger_HarvestedAreaHectares, + sum.hop_HarvestedAreaHectares, + sum.mate_HarvestedAreaHectares, + sum.nutmeg_HarvestedAreaHectares, + sum.pepper_HarvestedAreaHectares, + sum.peppermint_HarvestedAreaHectares, + sum.pyrethrum_HarvestedAreaHectares, + sum.ramie_HarvestedAreaHectares, + sum.rubber_HarvestedAreaHectares, + sum.spicenes_HarvestedAreaHectares, + sum.sugarnes_HarvestedAreaHectares, + sum.vanilla_HarvestedAreaHectares)) %>% + dplyr::select(G2008_1_ID, ADM1_CODE,ADM0_CODE, ADM1_NAME, ADM0_NAME, CONTINENT, REGION, treenut_ha, rest_ha) + + + +# Joining SPAM with GAUL L ----------------------------------------------------------- +dat_spam=read_csv(here("input_data", "spam2010V2r0_global_Y_TA.csv")) +dat_spam=dplyr::select(dat_spam, x,y, alloc_key, name_adm1, iso3, name_cntr) + +# Make SPAM spatial +# projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0" +# proj_mercator = "+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs" +proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below +dat_cent <- st_as_sf(x = dat_spam, + coords = c("x", "y"), + crs = proj) + +shape_gaul=read_sf(here("Input_data", "g2008_1.shp")) #actual GAUL map + + +# LOAD treenut production and harvested area +treenut_ha= read_sf(here("Input_data", "GAUL_treenut_rest_ha,shp", "GAUL_treenut_rest_ha.shp")) +treenut_pd= read_sf(here("Input_data", "GAUL_treenut_rest_pd,shp", "GAUL_treenut_rest_pd.shp")) +# write_sf(sph.all, dsn = here("Input_data", driver = "ESRI shapefile")) +sph.prod.tib = treenut_pd %>% + as_tibble() %>% + dplyr::select(G2008_1_ID, ADM1_CODE, ADM0_CODE, ADM0_NAME, treenut_pd, rest_pd,geometry) + +sph.harv = treenut_ha %>% + as_tibble() %>% + dplyr::select(G2008_1_ID, ADM1_CODE, ADM0_CODE, ADM0_NAME, treenut_ha, rest_ha,geometry) +# loading SPAM-GADM join + +# Creating a spatial join between the SPAM (pixel) and GAUL 2008 +SPAM_GAUL_JOIN = dat_cent %>% + st_join(., shape_gaul, join = st_within, left = T) +st_write(SPAM_GAUL_JOIN, here("Input_data", "GAUL2008_SPAM_LINK"), driver="ESRI Shapefile") # From here go to script +SPAM_GAUL_JOIN=read_sf(here("Input_data", "GAUL2008_SPAM_LINK")) + +# GAUL aggregation and transformation ------------------------------------ + +# Independent -- Merging treenuts with SPAM ------------------------------- +# LOAD data from previous step (due to memory overuse) +setwd(here('Input_data', "Rectypes_full_cropspread")) +yld_dat <- read_csv('Yield_full_cropspread.csv') +prd_dat <- read_csv('Production_full_cropspread.csv') +har_dat <- read_csv('Harv_area_spatial_spread_SPAM.csv') +phy_dat <- read_csv('Phys_area_full_cropspread.csv') + +# Aggregate results to adm1 level and calculate ratio between nuts and rest +val_pd = treenut_pd %>% + group_by(ADM1_CODE, ADM0_CODE) %>% + summarise(sum_pd = sum(treenut_pd, keep = T), + sum_rest_pd = sum(rest_pd)) + write_sf(val_pd, here("Input_data", "val_pd.shp")) + +val_pd=read_sf(dsn=here("Input_data", "val_pd.shp")) +val_pd=rename(val_pd,ADM1_CODE = ADM1_CO, #when loading the data run rename function + ADM0_CODE = ADM0_CO, + sum_rest_pd = sm_rst_) + + +val_ha = treenut_ha %>% + group_by(ADM1_CODE, ADM0_CODE) %>% + summarise(sum_ha = sum(treenut_ha, keep = T), + sum_rest_ha = sum(rest_ha)) #%>% + write_sf(val_ha, here("Input_data", "val_ha.shp")) + +val_ha=read_sf(here("Input_data", "val_ha.shp")) +val_ha = rename(val_ha, + ADM1_CODE = ADM1_CO, + ADM0_CODE = ADM0_CO, + sum_rest_ha = sm_rst_) + + +# Join of harvested area and production +join_ha_pd_gaul = val_pd %>% + group_by(ADM1_CODE, ADM0_CODE) %>% + as_tibble() %>% + left_join(., val_ha, by="ADM1_CODE") %>% + left_join(., shape_gaul, by = "ADM1_CODE") %>% + rowwise() %>% + mutate(tnuts_ratio_pd = sum_pd/sum(sum_pd, sum_rest_pd), + tnuts_ratio_ha = sum_ha/sum(sum_ha, sum_rest_ha)) %>% + dplyr::select(G2008_1_ID, ADM0_CODE, ADM0_NAME, ADM0_NAME, ADM1_NAME, CONTINENT, REGION, + sum_pd, sum_rest_pd, sum_ha, sum_rest_ha, tnuts_ratio_pd, tnuts_ratio_ha, geometry) #%>% +st_write(., here("Input_data", "GAUL_tnuts_rest_ratio_ha_pd"), driver="ESRI Shapefile") # From here go to script +join_ha_pd_gaul=read_sf(here("Input_data", "GAUL_tnuts_rest_ratio_ha_pd")) +join_ha_pd_gaul=join_ha_pd_gaul %>% as_tibble() %>% write_csv(here("Input_data", "join_ha_pd_gaul.csv")) # strange transformation. Creates weired rations in csv file + +# +# # Production +# # Join of harvested area and production +# join_pd_gaul = val_pd %>% +# group_by(ADM1_CODE, ADM0_CODE) %>% +# as_tibble() %>% +# # left_join(., val_ha, by="ADM1_CODE") %>% +# left_join(., shape_gaul, by = "ADM1_CODE") %>% +# rowwise() %>% +# mutate(tnuts_ratio_pd = sum_pd/sum(sum_pd, sum_rest_pd)) %>% #, +# # tnuts_ratio_ha = sum_ha/sum(sum_ha, sum_rest_ha)) %>% +# dplyr::select(G2008_1_ID, ADM0_CODE.x, ADM0_NAME, ADM0_NAME, ADM1_NAME, CONTINENT, REGION, +# sum_pd, sum_rest_pd, +# # sum_ha, sum_rest_ha, +# tnuts_ratio_pd, +# # tnuts_ratio_ha, +# geometry.y) %>% +# st_write(., here("Input_data", "GAUL_tnuts_rest_ratio_pd"), driver="ESRI Shapefile") # From here go to script +# join_pd_gaul=read_sf(here("Input_data", "GAUL_tnuts_rest_ratio_ha_pd")) +# +# # Harvested area +# join_ha_gaul = val_ha %>% +# group_by(ADM1_CODE, ADM0_CODE) %>% +# as_tibble() %>% +# # left_join(., val_ha, by="ADM1_CODE") %>% +# left_join(., shape_gaul, by = "ADM1_CODE") %>% +# rowwise() %>% +# mutate( +# # tnuts_ratio_pd = sum_pd/sum(sum_pd, sum_rest_pd), +# tnuts_ratio_ha = sum_ha/sum(sum_ha, sum_rest_ha)) %>% +# dplyr::select(G2008_1_ID, ADM0_CODE.x, ADM0_NAME, ADM0_NAME, ADM1_NAME, CONTINENT, REGION, +# # sum_pd, sum_rest_pd, +# sum_ha, sum_rest_ha, +# # tnuts_ratio_pd, +# tnuts_ratio_ha, geometry.y)# %>% +# st_write(., here("Input_data", "GAUL_tnuts_rest_ratio_ha"), driver="ESRI Shapefile") # From here go to script +# +# join_ha_gaul=read_sf(here("Input_data", "GAUL_tnuts_rest_ratio_ha_ha")) +# + + + +# Treenuts GAUL pixel --------------------------------------------------------------- +# Treenuts are loaded from the treenuts script +# Production +# # rbenchmark::benchmark( +# prod_dat_gaul_spam = SPAM_GAUL_JOIN %>% +# as_tibble() %>% +# left_join(.,sph.prod.tib, by="G2008_1_ID") %>% +# left_join(.,prd_dat, by="alloc_key") #%>% +# mutate(tnuts = trnt_r_*rest) %>% +# mutate(rest.1 = (1-trnt_r_)*rest) %>% +# dplyr::select(alloc_key, UID, GID_0.x, GID_1.x, rec_type, tech_type, unit, +# whea:rest, tnuts, rest.1) %>% +# write.csv(here("Input_data", "prod_tnuts_spam_gaul.csv")) + + +# c=read_csv(here("Input_data", "prod_tnuts_spam_gaul.csv")) +# +# # Approach validation > do the numbers make sense? +# xy=dat_join_prod %>% +# group_by(tnuts) %>% +# top_n(n=10) %>% +# dplyr::select(rest, tnuts, rest.1) +# +# write_csv(dat_join_prod, here("Input_data", "dat_treenut_prod.csv")) +# # st_as_sf() + +# Production +# rbenchmark::benchmark( +# treenut_pd = as_tibble(join_ha_pd_gaul) %>% +# rename(G2008_1_ID=G2008_1) %>% +# left_join(.,as_tibble(SPAM_GAUL_JOIN), by="G2008_1_ID") %>% +# right_join(.,prd_dat, by="alloc_key")%>% +# dplyr::select(alloc_key, G2008_1_ID, ADM1_CODE, ADM1_NAME, ADM0_CODE, ADM0_NAME, +# name_cntr, name_adm1.x, iso3.x, rec_type, tech_type, unit, whea:rest, tnuts_ratio_pd) %>% +# rename( +# # ADM1_NAME = ADM1_NAME.x, +# # ADM0_CODE = ADM0_CODE.x, +# # ADM0_NAME = ADM0_NAME.x, +# name_adm1 = name_adm1.x, +# iso3 =iso3.x) %>% +# write.csv(here("Input_data", "prod_tnuts_spam_gaul1.csv")) + +treenut_pd = as_tibble(join_ha_pd_gaul) %>% + # rename(G2008_1_ID = G2008_1) %>% + left_join(.,as_tibble(SPAM_GAUL_JOIN), by="G2008_1_ID") %>% + right_join(., prd_dat, by="alloc_key") %>% + dplyr::select(alloc_key, G2008_1_ID, ADM1_CODE, ADM1_NAME.x, ADM0_CODE.x, ADM0_NAME.x, + name_cntr, name_adm1.x, iso3.x, rec_type, tech_type, unit, whea:rest, tnuts_ratio_pd) %>% + rename(name_adm1 = name_adm1.x, + ADM1_NAME = ADM1_NAME.x, + ADM0_CODE =ADM0_CODE.x, + ADM0_NAME =ADM0_NAME.x, + iso3 =iso3.x) %>% + write_csv(here("Input_data", "prod_tnuts_spam_gaul1.csv")) +treenut_pd = read.csv(here("Input_data","prod_tnuts_spam_gaul1.csv")) + + +# Harvested area +treenut_ha = as_tibble(join_ha_pd_gaul) %>% + # rename(G2008_1_ID = G2008_1) %>% + left_join(.,as_tibble(SPAM_GAUL_JOIN), by="G2008_1_ID") %>% + right_join(.,har_dat , by="alloc_key") %>% + dplyr::select(alloc_key, G2008_1_ID, ADM1_CODE, ADM1_NAME.x, ADM0_CODE.x, ADM0_NAME.x, + name_cntr, name_adm1.x, iso3.x, rec_type, tech_type, unit, whea:rest, tnuts_ratio_ha) %>% + rename(name_adm1 = name_adm1.x, + ADM1_NAME = ADM1_NAME.x, + ADM0_CODE =ADM0_CODE.x, + ADM0_NAME =ADM0_NAME.x, + iso3 =iso3.x) %>% + write.csv(here("Input_data", "ha_tnuts_spam_gaul1.csv")) +treenut_ha = read_csv(here("Input_data","ha_tnuts_spam_gaul1.csv")) + +# physical area +treenut_phys = as_tibble(join_ha_pd_gaul) %>% + # rename(G2008_1_ID=G2008_1) %>% + left_join(.,as_tibble(SPAM_GAUL_JOIN), by="G2008_1_ID") %>% + right_join(.,phy_dat, by="alloc_key") %>% + dplyr::select(alloc_key, G2008_1_ID, ADM1_CODE, ADM1_NAME.x, ADM0_CODE.x, ADM0_NAME.x, + name_cntr, name_adm1.x, iso3.x, rec_type, tech_type, unit, whea:rest, tnuts_ratio_ha) %>% + rename(name_adm1 = name_adm1.x, + ADM0_CODE =ADM0_CODE.x, + ADM0_NAME =ADM0_NAME.x, + ADM1_NAME = ADM1_NAME.x, + iso3 =iso3.x) %>% + write.csv(here("Input_data", "phys_tnuts_spam_gaul1.csv")) +treenut_phys = read.csv(here("Input_data","phys_tnuts_spam_gaul1.csv")) + + + +# Approach: Aim is to get dinal datasets on pixel level > Adm1 level +# 1_Cbind the three datasets +# 2_Gather crops +# 3_Spread levels rec type +# 4_Calculate yields after unit transformation + + +# # GThis data contains all crops spread > Values correspond to the unaggregated SPAM dataset +dat.phys = read_csv(here("Input_data", "phys_tnuts_spam_gaul1.csv")) # this is pixel level spam +dat.prod = read_csv(here("Input_data", "prod_tnuts_spam_gaul1.csv")) # this is pixel level spam +dat.harv = read_csv(here("Input_data", "ha_tnuts_spam_gaul1.csv")) # this is pixel level spam + +# calulating treenuts and rest from share ratio +dat_prod = treenut_pd %>% mutate(tnuts = rest*tnuts_ratio_pd, + rest = rest*(1-tnuts_ratio_pd)) %>% + dplyr::select(-c(tnuts_ratio_pd)) %>% + write.csv(here("Input_data", "prod_tnuts_rest_spam_gaul_FINAL1.csv")) + +dat_harv = dat.harv %>% mutate(tnuts = rest*tnuts_ratio_ha, + rest = rest*(1-tnuts_ratio_ha)) %>% + dplyr::select(-c(tnuts_ratio_ha))%>% + write.csv(here("Input_data", "harv_tnuts_rest_spam_gaul_FINAL.csv")) + +dat_phys = dat.phys %>% mutate(tnuts = rest*tnuts_ratio_ha, + rest = rest*(1-tnuts_ratio_ha)) %>% + dplyr::select(-c(tnuts_ratio_ha)) %>% + write.csv(here("Input_data", "phys_tnuts_rest_spam_gaul_FINAL.csv")) + +# This data contains the final treenuts isolated from the initial rest. Rest here means now the herbs, etc +dat_phys = read_csv(here("Input_data", "phys_tnuts_rest_spam_gaul_FINAL.csv")) #tnutsratios good here -checked +dat_prod = read_csv(here("Input_data", "prod_tnuts_rest_spam_gaul_FINAL1.csv")) +dat_harv = read_csv(here("Input_data", "harv_tnuts_rest_spam_gaul_FINAL.csv")) +# head(dat.phys) +# head(dat.prod) +# head(dat.harv) + +# lst.tnuts = list(dat.harv, dat.phys, dat.prod) +dat_nutsall <- do.call("rbind", list(dat_harv, dat_phys, dat_prod)) + +# Calculating the yield for nuts +# yield_nut_dat = +do.call("rbind", list(dat_harv, dat_phys, dat_prod)) %>% + dplyr::select(-c(X1)) %>% + pivot_longer(names_to = "crops", values_to = "value", cols = whea:tnuts) %>% + pivot_wider(-unit, names_from = "rec_type", + values_from = "value") %>% + mutate(Y = (P*1000/H))%>% #checked for result. We are transforming mt to kg when multiplying Production by 1000 to get unit kg/ha for yields + mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% + # rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% + write_csv(here("Input_data", "pxl_rec_spreadtec_gaul_spam_tnuts.csv")) +dat.all=read_csv(here("Input_data", "pxl_rec_spread_tec_gaul_spam_tnuts.csv")) + + +# SUBSETTING FOR EU-27 GAUL ---------------------------------------------------------------------- +EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", + "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR") +# write.csv(EU_iso3, here("Input_data", "EU27_iso3_lst.csv")) +length(EU_iso3) #27 member states > the list comes from gams + +# Check if the EU countries iso3 are all present in dat.phys etc. +# EU_iso3 %in% dat.phys$iso3 #Validation if all EU27 countries are in the dfs below +dat.phys.EU = read_csv(here("Input_data", "phys_tnuts_rest_spam_gaul_FINAL.csv")) %>% #Pixel level data + filter(iso3 %in% EU_iso3) %>% + mutate(alloc_key = str_sub(alloc_key, start=2)) %>% + dplyr::select(-c(X1_1, X1)) + + +dat.prod.EU = read_csv(here("Input_data", "prod_tnuts_rest_spam_gaul_FINAL1.csv")) %>% + filter(iso3 %in% EU_iso3)%>% + mutate(alloc_key = str_sub(alloc_key, start=2)) %>% + dplyr::select(-c(X1)) + + +dat.harv.EU = read_csv(here("Input_data", "harv_tnuts_rest_spam_gaul_FINAL.csv")) %>% + filter(iso3 %in% EU_iso3) %>% + mutate(alloc_key = str_sub(alloc_key, start=2)) %>% + dplyr::select(-c(X1_1, X1)) + + +dat.all.EU= do.call("rbind", list(dat.phys.EU, dat.prod.EU, dat.harv.EU)) %>% + # filter(iso3 %in% EU_iso3) %>% + # dplyr::select(-c(X1, X1_1)) %>% + pivot_longer(names_to = "crops", values_to = "value", cols = whea:tnuts) %>% + pivot_wider(-unit, names_from = "rec_type", + values_from = "value" ) %>% + mutate(Y = (P*1000/H)) #%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. +mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% + # rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% + write_csv(here("Input_data", "pxl_rec_tec_gaul_spam_tnuts_EU_27_corr_prod.csv")) #IMPORTANT dataset. Pixel level. Tnuts. +dat.all.EU=read_csv(here("Input_data","pxl_rec_tec_gaul_spam_tnuts_EU_27_corr_prod.csv")) + +# Testing the approach +# { +# # cbind dfs - testing subset +# fil.prod = dat.prod %>% top_n(n=20) +# fil.harv = dat.harv %>% top_n(n=20) +# fil.phys = dat.phys %>% top_n(n=20) +# +# lst.tnuts = list(fil.harv, fil.phys, fil.prod) +# x= do.call("rbind", list(fil.harv, fil.phys, fil.prod)) +# +# long.dat.nuts <- do.call("rbind", list(fil.harv, fil.phys, fil.prod)) %>% +# dplyr::select(-c(X1)) %>% +# pivot_longer(names_to = "crops", values_to = "value", cols = whea:rest) %>% +# pivot_wider(-unit, names_from = "rec_type", +# values_from = "value" ) #%>% +# mutate(Y = (P*1000/H))%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. +# mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% +# rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% +# write.csv(here("Input_data", "test_rbind_fil")) +# } + +# Crop name mapping ------------------------------------------------------- +# SPAM mapping +nam_cat <- read_csv(here("Input_data", 'crop_names.csv')) # Loading crop name sheets - mapping SPAM -- FAO (FROM SPAM) +nam_cat <- nam_cat %>% mutate(FAOCODE = as.double(FAOCODE)) %>% + rename(No_spam = names(nam_cat)[1]) + +GAEZ_SPAM_crp = read_csv(here("Input_data", "6-GAEZ-and-SPAM-crops-correspondence-2015-02-26.csv")) #Mapping GAEZ and SPAM (from SPAM) +GAEZ_SPAM_crp = GAEZ_SPAM_crp %>% + rename(No_spam = names(.)[4]) %>% + dplyr::select('SPAM crop long name', 'GAEZ crop', No_spam) + +# ORiginal Croplist Modeldat CIFOS +FAO_CIFOS_crp = read_csv(here("Input_data", "FAO_modeldat_2020.csv")) + +# FAO spreaddsheets +FAO_stat_control = read_csv(here("Input_data","FAOSTAT_data_12-26-2020-1.csv")) +food_cat <- read_csv(here("Input_data",'foodnonfood.csv'))# laoding food usage sheet +food_cat = food_cat %>% + rename(`SPAM short name`=Crop_short) +FAOcntr <- read_csv(here("Input_data",'FAO_countries.csv')) # loading FAO country codes +FAOcntr <- dplyr::select(FAOcntr, 'ISO3', 'FAOSTAT_CODE', "GAUL_CODE", 'Continent', 'Subcontinent') + + +# The incomplete mapping of SPAM, GAEZ, FAOstat, FAO_Cifos are made +# Not all the crops are matched perfectly. This is made by hand in excel (export-import a chunk below) +model_dat = left_join(FAO_CIFOS_crp, FAO_stat_control, by="crop") %>% + left_join(nam_cat, ., by="FAOCODE") %>% + left_join(.,GAEZ_SPAM_crp, by= "No_spam") #%>% +write.csv(here("Input_data", "model_crops_incomplete.csv")) + +# This data frame is a complete mapping of GAEZ, SPAM and FAO (FAO original and CIFOS model dat FAO crop names) +# The FAO crop names from the model data differ slightly between Model dat and the fAO crops that I exctracted from FAOSTAT. +# Crop others were adjusted according to SPAM indication and the GAEZ mapped crops (cereal others = oats, etc.) + +dat_crp_map <- read_csv(here("Input_data","model_crops_mapping_GAEZ_SPAM_FAO_complete.csv")) +# dat_crp_map2 <- read_csv(here("Input_data", "model_crops_incomplete.csv")) + +#Validation > Result = Complete match! +# dat_crp_ma p2$ crop_cifos %in% FAO_CIFOS_crp$crop + +dat_crp_map1 = dat_crp_map %>% + rename(crop_cifos = crop) %>% + left_join(.,food_cat) #adding food categories (food/non-food) + +dat_crp_map1$Usage[42]="food" #adding the food category to treenuts +write.csv(dat_crp_map1, here("Input_data", "crop_name_mapping_complete.csv")) # this file can be used for further mapping of the crops + +# Aggregating to ADM1 level EU ----------------------------------------------- +# Load and transform data +dat.pxl.EU = read_csv(here("Input_data", "pxl_rec_tec_gaul_spam_tnuts_EU_27_corr_prod.csv")) +dat_TA_yld = read_csv(here("Input_data", "spam2010V2r0_global_Y_TA.csv")) %>% + dplyr::select(alloc_key,name_cntr,name_adm1, iso3, prod_level, x, y) %>% + mutate(alloc_key = str_sub(alloc_key, 2)) + + +# Mapping countries +FAOcntr <- read_csv(here("Input_data", 'FAO_countries.csv')) +FAOcntr <- dplyr::select(FAOcntr, 'ISO3', 'FAOSTAT_CODE', "GAUL_CODE", 'Continent', 'Subcontinent') %>% + rename(iso3=ISO3) +dat_EU27 = read_csv(here("Input_data", "Country_name_mapping_incomplete.csv"))%>% + left_join(.,FAOcntr, by="iso3") + +dat_crop_nam = read_csv(here("Input_data", "crop_name_mapping_complete.csv")) %>% + dplyr::select(-c(X1,X1_1)) %>% + rename(crops = 'SPAM short name') + +# Transform pixel data so that it has SPAM variables (adm1 level) + +dat.pixel.EU = dat.pxl.EU %>% + mutate_at(vars(alloc_key), funs(as.character)) %>% + left_join(.,dat_TA_yld, by="alloc_key") %>% + dplyr::select(alloc_key, iso3.x, name_cntr.x,name_adm1.x, prod_level, iso3.x, G2008_1_ID, + ADM0_CODE, ADM0_NAME, ADM1_CODE, ADM1_NAME, x, y, crops, tech_type) %>% + rename(iso3=iso3.x, + name_cntr=name_cntr.x, + name_adm1=name_adm1.x) %>% + left_join(.,dat_crop_nam, by="crops") %>% + left_join(.,dat_EU27, by="iso3") %>% + dplyr::select(prod_level, G2008_1_ID, tech_type, CIFOS_cntr_nam, crop_cifos, crops) + +# df with prod_level and Cifos country EU +EU_iso3 = c("AUT","BEL", "BGR", "HRV", "CZE", "DNK", "EST","FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA","LVA", + "LTU", "LUX", "MLT", "NLD", "POL", "PRT","ROU", "SVK","SVN", "ESP", "SWE", "GBR") + +dat_cntr_EU = dat.pxl.EU %>% + mutate_at(vars(alloc_key), funs(as.character)) %>% + left_join(.,dat_TA_yld, by="alloc_key") %>% + dplyr::select(prod_level, iso3.x, name_adm1.x) %>% + dplyr::distinct_at(vars('prod_level', 'iso3.x', "name_adm1.x"), .keep_all = F) %>% + rename(iso3=iso3.x, + name_adm1=name_adm1.x) %>% + filter(iso3 %in% EU_iso3) %>% + left_join(.,dat_EU27, by="iso3") %>% + dplyr::select(prod_level, iso3, name_adm1, CIFOS_cntr_nam, Continent, Subcontinent)# %>% +# rename( +# iso3=iso3.x, +# name_adm1=name_adm1.x) + +# df with crops and cifos crops +dat_crop_nam1 = read_csv(here("Input_data", "crop_name_mapping_complete.csv")) %>% + dplyr::select('SPAM short name', FAONAMES, crop_cifos) %>% + rename(crops = 'SPAM short name') + +# Aggregating H,A,P and calculating Yields per Adm1 level +dat.adm1.EU = dat.pxl.EU %>% + mutate_at(vars(alloc_key), funs(as.character)) %>% + # left_join(., dat.pxl.EU, by="ADM1_NAME") %>% + # mutate_at(vars(alloc_key), funs(as.character)) %>% + left_join(.,dat_TA_yld, by="alloc_key") %>% + dplyr::select(iso3.x, name_cntr.x, name_adm1.x, ADM0_NAME, ADM1_NAME, prod_level, tech_type, crops, A, H, P) %>% + group_by(prod_level, tech_type, crops) %>% + summarize(across(.cols = A:P, .fns = sum))%>% #all three rec types are summed per adm1(prod_level) and crop + mutate(Y = (P*1000/H))%>% # Checked with initial spam Yield sheet (kg/ha) (original yield = 285kg/ha, here =300kg/ha) convert mt to kg Issue: Weird as according to the EU stats page aMegatonne, abbreviated as Mt, is a metric unit equivalent to 1 million (106) tonnes, or 1 billion (109) kilograms. + mutate_at(vars(Y), ~replace(., is.nan(.), 0)) #%>% + +# This is the sheet that contains the parameters that are important for the GAMS input file +# It contains yield, harvested and physical area per country, adm1 unit and crop. +EU27_crp_adm1 = dat.adm1.EU %>% + left_join(.,dat_cntr_EU, by="prod_level") %>% + filter(iso3 %in% EU_iso3) %>% + left_join(.,dat_crop_nam1, by="crops") %>% + dplyr::select(iso3, FAONAMES, Subcontinent, Continent, CIFOS_cntr_nam, name_adm1, crop_cifos, A, H, P, Y) %>% + write_csv(here("Input_data", "EU27_crp_adm1_gaul.csv")) # Final crop yield,area,etc. file. + +# Final dataset!! +EU27_crp_adm1 <- read_csv(here("Input_data", "EU27_crp_adm1_gaul.csv")) +View(EU27_crp_adm1) + +length(unique(EU27_crp_adm1$iso3)) +is.na(EU27_crp_adm1)#Correct! 27 countries + + + + + + + + + + +# +# +# +# # OLD --------------------------------------------------------------------- +# # Loading the files and calculate final ratio ----------------------------- +# wd2=setwd(here("Input_data")) +# +# sph.prod1 = read_sf(dsn = wd2, layer = "GADM_treenut_rest_pd") +# sph.harv1 = read_sf(dsn = wd2, layer = "GADM_treenut_rest_ha") +# sph.all = st_join(sph.prod, sph.harv, join = st_within) +# write_sf(sph.all, dsn = here("Input_data"), driver = "ESRI Shapefile") #shapefile is called 'Input_data' +# +# join_harv = st_join(SPAM_GADM_JOIN_GIS_left, sph.harv, join = st_intersects) +# join_harv_prod_SPAM_GADM = st_join(join_harv, sph.prod, join = st_intersects) +# +# # Joining GADM with SPAM ------------------------------------------------------- +# wd=setwd(here("Input_data")) +# +# # Load and check shapefile +# shape=read_sf(dsn = wd, layer = "gadm36") # multipolygons containing all admin levels globally +# shape=dplyr::select(shape, 'UID', 'geometry', 'GID_0', 'GID_1') +# # shape_fil=filter(shape, GID_0 == 'FIN') +# +# # Making SPAM spatial explicit +# dat_spam=read_csv(here("Input_data", "spam2010V2r0_global_Y_TA.csv")) +# dat_spam=dplyr::select(dat_spam, 'x','y', 'alloc_key') +# +# # Make SPAM spatial +# projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0" +# dat_cent <- st_as_sf(x = dat_spam, +# coords = c("x", "y"), +# crs = projcrs) +# +# class(dat_cent) +# # dat_cent = filter(df, iso3=="FIN") +# +# +# SPAM_GADM_JOIN_GIS_left = st_join(dat_cent, shape, join = st_intersects, left = T) #https://postgis.net/docs/ST_Within.html +# # head(SPAM_GADM_JOIN_GIS) +# # class(SPAM_GADM_Join) +# +# st_write(obj = SPAM_GADM_JOIN_GIS, dsn = here("Input_data"), driver = "ESRI Shapefile") +# st_write(obj = SPAM_GADM_JOIN_GIS_left, dsn = here("Input_data"), driver = "ESRI Shapefile") +# +# # Joining SPAM and GAUL +# +# diff --git a/Treenuts/TreenutsGAUL2.R b/Treenuts/TreenutsGAUL2.R new file mode 100644 index 0000000000000000000000000000000000000000..3d7d89126a200320e083026e4489db4b1454d96d --- /dev/null +++ b/Treenuts/TreenutsGAUL2.R @@ -0,0 +1,789 @@ +easypackages::packages(c("tidyverse", 'sf', 'raster','sp','dplyr','spatialEco', + 'exactextractr','tibble', 'here', "readr", "margrittr","tydr", + "readxl", "sfheaders")) + +# New approach ----------------------------------------------------------- +# Idea: +# 1. Load all the harvested area and yields (GeoTIFF) +# 2. Extract to point of the Spam cells +# 3. +# 4. Then look at the proportion between Treenuts and the rest of spam crop others for each pixel + +# Zonal statistics ------------------------------------------------------- +wd=setwd(here("Input_data")) + +# Load and check shapefile +shape_gadm=read_sf(dsn = wd, layer = "gadm36") # multipolygons containing all admin levels globally +shape_gadm=dplyr::select(shape_gadm, 'UID', 'geometry', 'GID_0', 'GID_1') + +shape_gaul=read_sf(here("Input_data", "g2008_1.shp")) #actual GAUL map +shape_gaul=dplyr::select(shape_gaul, 'G2008_1_ID', 'ADM1_CODE', "ADM0_CODE", 'ADM0_NAME', 'ADM1_NAME', "CONTINENT", "REGION", "geometry") +# class(shape_gadm) +# shape_fil=filter(shape, GID_0 == 'FIN') +dat_spam=read_csv(here("Input_data", "spam2010V2r0_global_Y_TA.csv")) +dat_spam=dplyr::select(dat_spam, x,y, alloc_key, name_adm1, iso3) + +# Make SPAM spatial +# projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0" +# proj_mercator = "+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs" +proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below +proj_meter = "+datum=WGS84 +units=m +no_defs" +dat_cent <- st_as_sf(x = dat_spam, + coords = c("x", "y"), + crs = proj) + + +# Load treenuts and rest files +{ + # ylds - Nuts + rast_walnut_yld = raster(here("Input_data","walnut_YieldPerHectare.tif")) + { + + # class(rast_walnut_yld) + + # rast_walnut_withoutNA = rast_walnut_yld + # # + # mean(rast_walnut_yld$walnut_YieldPerHectare) + # mean(rast_walnut_withoutNA$walnut_YieldPerHectare) + # rast_walnut_withoutNA[rast_walnut_withoutNA$walnut_YieldPerHectare == 0] <- NA + # check=as.raster(rast_walnut_withoutNA) + # + # cellStats(rast_walnut_withoutNA$walnut_YieldPerHectare, mean) + # + # cellStats(rast_walnut_withoutNA, mean) + # cellStats(rast_walnut_yld, mean) + # rast_walnut_withoutNA$walnut_YieldPerHectare + } + rast_almond_yld = raster(here("Input_data","almond_YieldPerHectare.tif")) + rast_chestnut_yld = raster(here("Input_data","chestnut_YieldPerHectare.tif")) + rast_cashew_yld = raster(here("Input_data","cashew_YieldPerHectare.tif")) + rast_pistachio_yld = raster(here("Input_data","pistachio_YieldPerHectare.tif")) + rast_kolanut_yld = raster(here("Input_data", "kolanut_YieldPerHectare.tif")) + rast_hazelnut_yld = raster(here("Input_data","hazelnut_YieldPerHectare.tif")) + rast_brazil_yld = raster(here("Input_data","brazil_YieldPerHectare.tif")) + rast_areca_yld = raster(here("Input_data","areca_YieldPerHectare.tif")) + rast_nutnes_yld = raster(here("Input_data","nutnes_YieldPerHectare.tif")) + { + # 0's to Na's + # rast_almond_yldNA=rast_almond_yld[rast_almond_yld$almond_YieldPerHectare == 0] <- NA + + # rast_almond_yld[rast_almond_yld$almond_YieldPerHectare == 0] <- NA + + # cellStats(rast_walnut_withoutNA$walnut_YieldPerHectare, min) + # cellStats(rast_walnut_yld$walnut_YieldPerHectare, min) + + # rast_almond_yld + } + # yld - Rest + rast_anise_yld = raster(here("Input_data","aniseetc_YieldPerHectare.tif")) + rast_chili_yld = raster(here("Input_data","chilleetc_YieldPerHectare.tif")) + rast_cinnamon_yld = raster(here("Input_data","cinnamon_YieldPerHectare.tif")) + rast_ginger_yld = raster(here("Input_data",'ginger_YieldPerHectare.tif')) + rast_hop_yld = raster(here("Input_data",'hop_YieldPerHectare.tif')) + rast_mate_yld = raster(here("Input_data",'mate_YieldPerHectare.tif')) + rast_nutmeg_yld = raster(here("Input_data","nutmeg_YieldPerHectare.tif")) + rast_pepper_yld = raster(here("Input_data",'pepper_YieldPerHectare.tif')) + rast_peppermint_yld = raster(here("Input_data",'peppermint_YieldPerHectare.tif')) + rast_pyreth_yld = raster(here("Input_data",'pyrethrum_YieldPerHectare.tif')) + rast_ramie_yld = raster(here("Input_data",'ramie_YieldPerHectare.tif')) + rast_ruber_yld = raster(here("Input_data",'rubber_YieldPerHectare.tif')) + rast_spices_yld = raster(here("Input_data",'spicenes_YieldPerHectare.tif')) + rast_sugarnes_yld = raster(here("Input_data",'sugarnes_YieldPerHectare.tif')) + rast_vanilla_yld = raster(here("Input_data",'vanilla_YieldPerHectare.tif')) + + # Production - treenuts + + rast_walnut_pd = raster(here("Input_data","walnut_Production.tif")) + rast_almond_pd = raster(here("Input_data","almond_Production.tif")) + rast_chestnut_pd = raster(here("Input_data","chestnut_Production.tif")) + rast_cashew_pd = raster(here("Input_data","cashew_Production.tif")) + rast_pistachio_pd = raster(here("Input_data","pistachio_Production.tif")) + rast_kolanut_pd = raster(here("Input_data", "kolanut_Production.tif")) + rast_hazelnut_pd = raster(here("Input_data","hazelnut_Production.tif")) + rast_brazil_pd = raster(here("Input_data","brazil_Production.tif")) + rast_areca_pd = raster(here("Input_data","areca_Production.tif")) + rast_nutnes_pd = raster(here("Input_data","nutnes_Production.tif")) + # Production -rest + rast_anise_pd = raster(here("Input_data","aniseetc_Production.tif")) + rast_chili_pd = raster(here("Input_data","chilleetc_Production.tif")) + rast_cinnamon_pd = raster(here("Input_data","cinnamon_Production.tif")) + rast_ginger_pd = raster(here("Input_data",'ginger_Production.tif')) + rast_hop_pd = raster(here("Input_data",'hop_Production.tif')) + rast_mate_pd = raster(here("Input_data",'mate_Production.tif')) + rast_nutmeg_pd = raster(here("Input_data","nutmeg_Production.tif")) + rast_pepper_pd = raster(here("Input_data",'pepper_Production.tif')) + rast_peppermint_pd = raster(here("Input_data",'peppermint_Production.tif')) + rast_pyreth_pd = raster(here("Input_data",'pyrethrum_Production.tif')) + rast_ramie_pd = raster(here("Input_data",'ramie_Production.tif')) + rast_ruber_pd = raster(here("Input_data",'rubber_Production.tif')) + rast_spices_pd = raster(here("Input_data",'spicenes_Production.tif')) + rast_sugarnes_pd = raster(here("Input_data",'sugarnes_Production.tif')) + rast_vanilla_pd = raster(here("Input_data",'vanilla_Production.tif')) +x=rasterToPolygons(rast_vanilla_pd, digits=12) +x1=st_as_sf(x) + + # harvested area - treentus + rast_walnut_ha = raster(here("Input_data","walnut_HarvestedAreaHectares.tif")) + rast_almond_ha = raster(here("Input_data","almond_HarvestedAreaHectares.tif")) + rast_chestnut_ha = raster(here("Input_data","chestnut_HarvestedAreaHectares.tif")) + rast_cashew_ha = raster(here("Input_data","cashew_HarvestedAreaHectares.tif")) + rast_pistachio_ha = raster(here("Input_data","pistachio_HarvestedAreaHectares.tif")) + rast_kolanut_ha = raster(here("Input_data", "kolanut_HarvestedAreaHectares.tif")) + rast_hazelnut_ha = raster(here("Input_data","hazelnut_HarvestedAreaHectares.tif")) + rast_brazil_ha = raster(here("Input_data","brazil_HarvestedAreaHectares.tif")) + rast_areca_ha = raster(here("Input_data","areca_HarvestedAreaHectares.tif")) + rast_nutnes_ha = raster(here("Input_data","nutnes_HarvestedAreaHectares.tif")) + # HarvestedAreaHectares -rest + rast_anise_ha = raster(here("Input_data","aniseetc_HarvestedAreaHectares.tif")) + rast_chili_ha = raster(here("Input_data","chilleetc_HarvestedAreaHectares.tif")) + rast_cinnamon_ha = raster(here("Input_data","cinnamon_HarvestedAreaHectares.tif")) + rast_ginger_ha = raster(here("Input_data",'ginger_HarvestedAreaHectares.tif')) + rast_hop_ha = raster(here("Input_data",'hop_HarvestedAreaHectares.tif')) + rast_mate_ha = raster(here("Input_data",'mate_HarvestedAreaHectares.tif')) + rast_nutmeg_ha = raster(here("Input_data","nutmeg_HarvestedAreaHectares.tif")) + rast_pepper_ha = raster(here("Input_data",'pepper_HarvestedAreaHectares.tif')) + rast_peppermint_ha = raster(here("Input_data",'peppermint_HarvestedAreaHectares.tif')) + rast_pyreth_ha = raster(here("Input_data",'pyrethrum_HarvestedAreaHectares.tif')) + rast_ramie_ha = raster(here("Input_data",'ramie_HarvestedAreaHectares.tif')) + rast_ruber_ha = raster(here("Input_data",'rubber_HarvestedAreaHectares.tif')) + rast_spices_ha = raster(here("Input_data",'spicenes_HarvestedAreaHectares.tif')) + rast_sugarnes_ha = raster(here("Input_data",'sugarnes_HarvestedAreaHectares.tif')) + rast_vanilla_ha = raster(here("Input_data",'vanilla_HarvestedAreaHectares.tif')) +} + +# yld_list=list(rast_almond_yld, rast_cashew_yld, rast_chestnut_yld, rast_hazelnut_yld, rast_kolanut_yld, rast_pistachio_yld, rast_walnut_yld) +# stack_yld=stack(rast_almond_yld, rast_cashew_yld, rast_chestnut_yld, rast_hazelnut_yld, rast_kolanut_yld, rast_pistachio_yld, rast_walnut_yld) +# stack_ha=stack(rast_almond_ha, rast_cashew_ha, rast_chestnut_ha, rast_hazelnut_ha, rast_kolanut_ha, rast_pistachio_ha, rast_walnut_ha) +# +# dat$mean.nfertilizer_global +# summary(dat$mean.nfertilizer_global) +# + +# Perform zonal statistics ------------------------------------------------ +# FunZone_yld=function(rast,shape, na.rm=T){ # raster and vector +# c=zonal.stats(shape, rast, stats = c("Fun_nzmean")) +# c$UID <- shape$UID # assigning the identifier of the original df to the extracted df +# merge(shape, c, by.x = 'UID', by.y = 'UID') # merge the two files together +# } +# GADM +FunZone_sum=function(rast){ # raster and vector + c=zonal.stats(shape, rast, stats = c("sum")) + c$UID <- shape$UID # assigning the identifier of the original df to the extracted df + dat <- merge(shape, c, by.x = 'UID', by.y = 'UID') # merge the two files together +} + +# GAUL +FunZone_sum=function(rast){ # raster and vector + c=zonal.stats(shape_gaul, rast, stats = c("sum")) + c$G2008_1_ID <- shape_gaul$G2008_1_ID # assigning the identifier of the original df to the extracted df + dat <- merge(shape_gaul, c, by.x = 'G2008_1_ID', by.y = 'G2008_1_ID') # merge the two files together +} + +# +# original ---------------------------------------------------------------- +FunZone=function(rast, shape, na.rm=T){ # raster and vector + c=zonal.stats(shape, rast, stats = c("mean")) + c$UID <- shape$UID # assigning the identifier of the original df to the extracted df + dat <- merge(shape, c, by.x = 'UID', by.y = 'UID') # merge the two files togethere +} +?read_sf +# # test ignoring 0s -------------------------------------------------------- +# nzmean <- function(x) { +# zvals <- x==0 +# if (all(zvals)) 0 else mean(x[!zvals]) +# } +# +# Fun_nzmean=function(r, na.rm=F){ +# mean(r[r!=0]) +# } +# +# FunZone_yld_0=function(rast,shape){ # raster and vector +# c=zonal.stats(shape, rast, stats = c("nzmean")) +# c$UID <- shape$UID # assigning the identifier of the original df to the extracted df +# merge(shape, c, by.x = 'UID', by.y = 'UID') # merge the two files together +# } +# +# # Test raster +# v2=raster::extract(rast_walnut_yld,shape_fil,fun='Fun_nzmean') + + +# Yields ------------------------------------------------------------------ +# Apply zonal stats to treenuts +ex_walnut_yld = FunZone_yld(rast_walnut_yld,shape_fil) +ex_walnut_yld_0 = FunZone_yld(rast_walnut_yld,shape_fil) + +# mean(ex_walnut_yld$mean.walnut_YieldPerHectare) +# mean(ex_walnut_yld_0$mean.walnut_YieldPerHectare) +# mean(ex_walnut_withNA_FIn$mean.walnut_YieldPerHectare) + +ex_walnut_withNA_FIn = FunZone_yld(rast_walnut_withoutNA,shape_fil) +ex_walnut_yld$nzmean.walnut_YieldPerHectare +# max(ex_walnut_withNA_FIn$mean.walnut_YieldPerHectare) + + +ex_almond_yld = FunZone_yld(rast_almond_yld) +ex_chestnut_yld = FunZone_yld(rast_chestnut_yld) +ex_cashew_yld = FunZone_yld(rast_cashew_yld) +ex_pistachio_yld = FunZone_yld(rast_pistachio_yld) +ex_kolanut_yld = FunZone_yld(rast_kolanut_yld) +ex_hazelnut_yld = FunZone_yld(rast_hazelnut_yld) +ex_brazil_yld = FunZone_yld(rast_brazil_yld) +ex_areca_yld = FunZone_yld(rast_areca_yld) +ex_nutnes_yld = FunZone_yld(rast_nutnes_yld) + +# Apply zonal stats to rest +ex_anise_yld = FunZone_yld(rast_anise_yld) +ex_chili_yld = FunZone_yld(rast_chili_yld) +ex_cinnamon_yld =FunZone_yld(rast_cinnamon_yld) +ex_ginger_yld = FunZone_yld(rast_ginger_yld) +ex_hop_yld = FunZone_yld(rast_hop_yld) +ex_mate_yld = FunZone_yld(rast_mate_yld) +ex_nutmeg_yld = FunZone_yld(rast_nutmeg_yld) +ex_pepper_yld = FunZone_yld(rast_pepper_yld) +ex_peppermint_yld = FunZone_yld(rast_peppermint_yld) +ex_pyreth_yld = FunZone_yld(rast_pyreth_yld) +ex_ramie_yld = FunZone_yld(rast_ramie_yld) +ex_ruber_yld = FunZone_yld(rast_ruber_yld) +ex_spices_yld = FunZone_yld(rast_spices_yld) +ex_sugarnes_yld = FunZone_yld(rast_sugarnes_yld) +ex_vanilla_yld = FunZone_yld(rast_vanilla_yld) + +# Join columns (cbind) +lst.ex.yld = list(ex_walnut_yld,ex_almond_yld,ex_chestnut_yld,ex_cashew_yld,ex_pistachio_yld, ex_kolanut_yld, ex_hazelnut_yld, ex_areca_yld, ex_brazil_yld, ex_nutnes_yld, + ex_anise_yld, ex_chili_yld, ex_cinnamon_yld, ex_ginger_yld, ex_hop_yld, ex_mate_yld, ex_nutmeg_yld, ex_pepper_yld, ex_peppermint_yld, ex_pyreth_yld, ex_ramie_yld, ex_ruber_yld, ex_spices_yld, ex_sugarnes_yld, ex_vanilla_yld) +dat_ex.yld = do.call("cbind", lst.ex.yld) # merging all tech types +# head(dat_ex.yld) + +treenut_yld = dat_ex.yld %>% + select(UID, GID_0,GID_1,mean.walnut_YieldPerHectare, + mean.almond_YieldPerHectare, + mean.chestnut_YieldPerHectare, + mean.cashew_YieldPerHectare, + mean.pistachio_YieldPerHectare, + mean.kolanut_YieldPerHectare, + mean.hazelnut_YieldPerHectare, + mean.areca_YieldPerHectare, + mean.brazil_YieldPerHectare, + mean.nutnes_YieldPerHectare, + geometry) %>% + rowwise()%>% + mutate(treenut_yld= rowmean(c_across(mean.almond_YieldPerHectare:mean.areca_YieldPerHectare)))%>% + mutate(treenut_yld= rowmean(c_across(mean.anise_YieldPerHectare:mean.vanilla_YieldPerHectare))) + + +# class(treenut_yld) +# write.csv(treenut_yld, 'D:/Raw_dat/Raw_dat/Ramakutty/HarvestedAreaYield175Crops_Geotiff/HarvestedAreaYield175Crops_Geotiff/Treenuts', row.names=T) +st_write(treenut_yld, here("Input_data", "GADM_treenut.shp"), driver="ESRI Shapefile") # create to a shapefile + + +# Production ------------------------------------------------------------- + +ex_walnut_pd = FunZone_sum(rast_walnut_pd) +ex_almond_pd = FunZone_sum(rast_almond_pd) +ex_chestnut_pd = FunZone_sum(rast_chestnut_pd) +ex_cashew_pd = FunZone_sum(rast_cashew_pd) +ex_pistachio_pd = FunZone_sum(rast_pistachio_pd) +ex_kolanut_pd = FunZone_sum(rast_kolanut_pd) +ex_hazelnut_pd = FunZone_sum(rast_hazelnut_pd) +ex_brazil_pd = FunZone_sum(rast_brazil_pd) +ex_areca_pd = FunZone_sum(rast_areca_pd) +ex_nutnes_pd = FunZone_sum(rast_nutnes_pd) + +# Rest +ex_anise_pd = FunZone_sum(rast_anise_pd) +ex_chili_pd = FunZone_sum(rast_chili_pd) +ex_cinnamon_pd =FunZone_sum(rast_cinnamon_pd) +ex_ginger_pd = FunZone_sum(rast_ginger_pd) +ex_hop_pd = FunZone_sum(rast_hop_pd) +ex_mate_pd = FunZone_sum(rast_mate_pd) +ex_nutmeg_pd = FunZone_sum(rast_nutmeg_pd) +ex_pepper_pd = FunZone_sum(rast_pepper_pd) +ex_peppermint_pd = FunZone_sum(rast_peppermint_pd) +ex_pyreth_pd = FunZone_sum(rast_pyreth_pd) +ex_ramie_pd = FunZone_sum(rast_ramie_pd) +ex_ruber_pd = FunZone_sum(rast_ruber_pd) +ex_spices_pd = FunZone_sum(rast_spices_pd) +ex_sugarnes_pd = FunZone_sum(rast_sugarnes_pd) +ex_vanilla_pd = FunZone_sum(rast_vanilla_pd) + +# list all treenuts +lst.ex.pd = list(ex_walnut_pd,ex_almond_pd,ex_chestnut_pd,ex_cashew_pd,ex_pistachio_pd, ex_kolanut_pd, ex_hazelnut_pd, ex_areca_pd, ex_nutnes_pd, + ex_anise_pd, ex_chili_pd, ex_cinnamon_pd, ex_ginger_pd, ex_hop_pd, ex_mate_pd, ex_nutmeg_pd, ex_pepper_pd, ex_peppermint_pd, ex_pyreth_pd, ex_ramie_pd, ex_ruber_pd, ex_spices_pd, ex_sugarnes_pd, ex_vanilla_pd) +dat_ex.pd = do.call("cbind", lst.ex.pd) # merging all tech types + +# list rest + +treenut_pd = dat_ex.pd %>% + # dplyr::select(UID, GID_0,GID_1,sum.walnut_Production, + dplyr::select(G2008_1_ID, ADM1_CODE,ADM0_CODE, ADM1_NAME,ADM0_NAME, CONTINENT, REGION, + sum.walnut_Production, + sum.almond_Production, + sum.chestnut_Production, + sum.cashew_Production, + sum.pistachio_Production, + sum.kolanut_Production, + sum.hazelnut_Production, + sum.aniseetc_Production, + sum.chilleetc_Production, + sum.cinnamon_Production, + sum.ginger_Production, + sum.hop_Production, + sum.mate_Production, + sum.nutmeg_Production, + sum.pepper_Production, + sum.peppermint_Production, + sum.pyrethrum_Production, + sum.ramie_Production, + sum.rubber_Production, + sum.spicenes_Production, + sum.sugarnes_Production, + sum.vanilla_Production, + geometry) %>% + group_by(ADM1_CODE, ADM0_CODE) %>% + mutate(treenut_pd= sum(sum.walnut_Production, + sum.almond_Production, + sum.chestnut_Production, + sum.cashew_Production, + sum.pistachio_Production, + sum.kolanut_Production, + sum.hazelnut_Production))%>% + mutate(rest_pd = sum(sum.aniseetc_Production, + sum.chilleetc_Production, + sum.cinnamon_Production, + sum.ginger_Production, + sum.hop_Production, + sum.mate_Production, + sum.nutmeg_Production, + sum.pepper_Production, + sum.peppermint_Production, + sum.pyrethrum_Production, + sum.ramie_Production, + sum.rubber_Production, + sum.spicenes_Production, + sum.sugarnes_Production, + sum.vanilla_Production)) %>% + dplyr::select(G2008_1_ID, ADM1_CODE,ADM0_CODE, ADM1_NAME, ADM0_NAME, CONTINENT, REGION, treenut_pd, rest_pd) + + +# class(treenut_yld) +# write.csv(treenut_yld, 'D:/Raw_dat/Raw_dat/Ramakutty/HarvestedAreaYield175Crops_Geotiff/HarvestedAreaYield175Crops_Geotiff/Treenuts', row.names=T) +st_write(treenut_pd, here("Input_data", "GAUL_treenut_rest_pd.shp"), driver="ESRI Shapefile") # create to a shapefile +treenut_pd= read_sf(here("Input_data", "GAUL_treenut_rest_pd,shp", "GAUL_treenut_rest_pd.shp")) + +# Harvested area - Treenuts ----------------------------------------------- + +ex_walnut_ha = FunZone_sum(rast_walnut_ha) +ex_almond_ha = FunZone_sum(rast_almond_ha) +ex_chestnut_ha = FunZone_sum(rast_chestnut_ha) +ex_cashew_ha = FunZone_sum(rast_cashew_ha) +ex_pistachio_ha = FunZone_sum(rast_pistachio_ha) +ex_kolanut_ha = FunZone_sum(rast_kolanut_ha) +ex_hazelnut_ha = FunZone_sum(rast_hazelnut_ha) +ex_brazil_ha = FunZone_sum(rast_brazil_ha) +ex_areca_ha = FunZone_sum(rast_areca_ha) +ex_nutnes_ha = FunZone_sum(rast_nutnes_ha) + +# Rest +ex_anise_ha = FunZone_sum(rast_anise_ha) +ex_chili_ha = FunZone_sum(rast_chili_ha) +ex_cinnamon_ha =FunZone_sum(rast_cinnamon_ha) +ex_ginger_ha = FunZone_sum(rast_ginger_ha) +ex_hop_ha = FunZone_sum(rast_hop_ha) +ex_mate_ha = FunZone_sum(rast_mate_ha) +ex_nutmeg_ha = FunZone_sum(rast_nutmeg_ha) +ex_pepper_ha = FunZone_sum(rast_pepper_ha) +ex_peppermint_ha = FunZone_sum(rast_peppermint_ha) +ex_pyreth_ha = FunZone_sum(rast_pyreth_ha) +ex_ramie_ha = FunZone_sum(rast_ramie_ha) +ex_ruber_ha = FunZone_sum(rast_ruber_ha) +ex_spices_ha = FunZone_sum(rast_spices_ha) +ex_sugarnes_ha = FunZone_sum(rast_sugarnes_ha) +ex_vanilla_ha = FunZone_sum(rast_vanilla_ha) + +# list all treenuts +lst.ex.ha = list(ex_walnut_ha,ex_almond_ha,ex_chestnut_ha,ex_cashew_ha,ex_pistachio_ha, ex_kolanut_ha, ex_hazelnut_ha, ex_areca_ha, ex_nutnes_ha, + ex_anise_ha, ex_chili_ha, ex_cinnamon_ha, ex_ginger_ha, ex_hop_ha, ex_mate_ha, ex_nutmeg_ha, ex_pepper_ha, ex_peppermint_ha, ex_pyreth_ha, ex_ramie_ha, ex_ruber_ha, ex_spices_ha, ex_sugarnes_ha, ex_vanilla_ha) +dat_ex.ha = do.call("cbind", lst.ex.ha) # merging all tech types + +# list rest + +treenut_ha = dat_ex.ha %>% + dplyr::select(G2008_1_ID, ADM1_CODE,ADM0_CODE, ADM1_NAME, ADM0_NAME, CONTINENT, REGION, + sum.walnut_HarvestedAreaHectares, + sum.almond_HarvestedAreaHectares, + sum.chestnut_HarvestedAreaHectares, + sum.cashew_HarvestedAreaHectares, + sum.pistachio_HarvestedAreaHectares, + sum.kolanut_HarvestedAreaHectares, + sum.hazelnut_HarvestedAreaHectares, + sum.aniseetc_HarvestedAreaHectares, + sum.chilleetc_HarvestedAreaHectares, + sum.cinnamon_HarvestedAreaHectares, + sum.ginger_HarvestedAreaHectares, + sum.hop_HarvestedAreaHectares, + sum.mate_HarvestedAreaHectares, + sum.nutmeg_HarvestedAreaHectares, + sum.pepper_HarvestedAreaHectares, + sum.peppermint_HarvestedAreaHectares, + sum.pyrethrum_HarvestedAreaHectares, + sum.ramie_HarvestedAreaHectares, + sum.rubber_HarvestedAreaHectares, + sum.spicenes_HarvestedAreaHectares, + sum.sugarnes_HarvestedAreaHectares, + sum.vanilla_HarvestedAreaHectares, + geometry) %>% + group_by(ADM0_CODE, ADM1_CODE) %>% + mutate(treenut_ha = sum(sum.walnut_HarvestedAreaHectares, + sum.almond_HarvestedAreaHectares, + sum.chestnut_HarvestedAreaHectares, + sum.cashew_HarvestedAreaHectares, + sum.pistachio_HarvestedAreaHectares, + sum.kolanut_HarvestedAreaHectares, + sum.hazelnut_HarvestedAreaHectares)) %>% + mutate(rest_ha = sum(sum.aniseetc_HarvestedAreaHectares, + sum.chilleetc_HarvestedAreaHectares, + sum.cinnamon_HarvestedAreaHectares, + sum.ginger_HarvestedAreaHectares, + sum.hop_HarvestedAreaHectares, + sum.mate_HarvestedAreaHectares, + sum.nutmeg_HarvestedAreaHectares, + sum.pepper_HarvestedAreaHectares, + sum.peppermint_HarvestedAreaHectares, + sum.pyrethrum_HarvestedAreaHectares, + sum.ramie_HarvestedAreaHectares, + sum.rubber_HarvestedAreaHectares, + sum.spicenes_HarvestedAreaHectares, + sum.sugarnes_HarvestedAreaHectares, + sum.vanilla_HarvestedAreaHectares)) %>% + dplyr::select(G2008_1_ID, ADM1_CODE,ADM0_CODE, ADM1_NAME, ADM0_NAME, CONTINENT, REGION, treenut_ha, rest_ha) + + + +# Joining SPAM with GAUL L ----------------------------------------------------------- +dat_spam=read_csv(here("input_data", "spam2010V2r0_global_Y_TA.csv")) +dat_spam=dplyr::select(dat_spam, x,y, alloc_key, name_adm1, iso3, name_cntr) + +# Make SPAM spatial +# projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0" +# proj_mercator = "+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs" +proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below +dat_cent <- st_as_sf(x = dat_spam, + coords = c("x", "y"), + crs = proj) + +shape_gaul=read_sf(here("Input_data", "g2008_1.shp")) #actual GAUL map + + +# LOAD treenut production and harvested area +treenut_ha= read_sf(here("Input_data", "GAUL_treenut_rest_ha,shp", "GAUL_treenut_rest_ha.shp")) +treenut_pd= read_sf(here("Input_data", "GAUL_treenut_rest_pd,shp", "GAUL_treenut_rest_pd.shp")) +# write_sf(sph.all, dsn = here("Input_data", driver = "ESRI shapefile")) +sph.prod.tib = treenut_pd %>% + as_tibble() %>% + dplyr::select(G2008_1_ID, ADM1_CODE, ADM0_CODE, ADM0_NAME, treenut_pd, rest_pd,geometry) + +sph.harv = treenut_ha %>% + as_tibble() %>% + dplyr::select(G2008_1_ID, ADM1_CODE, ADM0_CODE, ADM0_NAME, treenut_ha, rest_ha,geometry) +# loading SPAM-GADM join + +# Creating a spatial join between the SPAM (pixel) and GAUL 2008 +SPAM_GAUL_JOIN = dat_cent %>% + st_join(., shape_gaul, join = st_within, left = T) +st_write(SPAM_GAUL_JOIN, here("Input_data", "GAUL2008_SPAM_LINK"), driver="ESRI Shapefile") # From here go to script +SPAM_GAUL_JOIN=read_sf(here("Input_data", "GAUL2008_SPAM_LINK")) + +# GAUL aggregation and transformation ------------------------------------ + +# Independent -- Merging treenuts with SPAM ------------------------------- +# LOAD data from previous step (due to memory overuse) +setwd(here('Input_data', "Rectypes_full_cropspread")) +yld_dat <- read_csv('Yield_full_cropspread.csv') +prd_dat <- read_csv('Production_full_cropspread.csv') +har_dat <- read_csv('Harv_area_spatial_spread_SPAM.csv') +phy_dat <- read_csv('Phys_area_full_cropspread.csv') + +# Aggregate results to adm1 level and calculate ratio between nuts and rest +val_pd = treenut_pd %>% + group_by(ADM1_CODE, ADM0_CODE) %>% + summarise(sum_pd = sum(treenut_pd, keep = T), + sum_rest_pd = sum(rest_pd)) +write_sf(val_pd, here("Input_data", "val_pd.shp")) + +val_pd=read_sf(dsn=here("Input_data", "val_pd.shp")) +val_pd=rename(val_pd,ADM1_CODE = ADM1_CO, #when loading the data run rename function + ADM0_CODE = ADM0_CO, + sum_rest_pd = sm_rst_) + + +val_ha = treenut_ha %>% + group_by(ADM1_CODE, ADM0_CODE) %>% + summarise(sum_ha = sum(treenut_ha, keep = T), + sum_rest_ha = sum(rest_ha)) #%>% +write_sf(val_ha, here("Input_data", "val_ha.shp")) + +val_ha=read_sf(here("Input_data", "val_ha.shp")) +val_ha = rename(val_ha, + ADM1_CODE = ADM1_CO, + ADM0_CODE = ADM0_CO, + sum_rest_ha = sm_rst_) + + +# Join of harvested area and production +join_ha_pd_gaul = val_pd %>% + group_by(ADM1_CODE, ADM0_CODE) %>% + as_tibble() %>% + left_join(., val_ha, by="ADM1_CODE") %>% + left_join(., shape_gaul, by = "ADM1_CODE") %>% + rowwise() %>% + mutate(tnuts_ratio_pd = sum_pd/sum(sum_pd, sum_rest_pd), + tnuts_ratio_ha = sum_ha/sum(sum_ha, sum_rest_ha)) %>% + dplyr::select(G2008_1_ID, ADM0_CODE, ADM0_NAME, ADM0_NAME, ADM1_NAME, CONTINENT, REGION, + sum_pd, sum_rest_pd, sum_ha, sum_rest_ha, tnuts_ratio_pd, tnuts_ratio_ha, geometry) #%>% +st_write(., here("Input_data", "GAUL_tnuts_rest_ratio_ha_pd"), driver="ESRI Shapefile") # From here go to script +join_ha_pd_gaul=read_sf(here("Input_data", "GAUL_tnuts_rest_ratio_ha_pd")) +join_ha_pd_gaul=join_ha_pd_gaul %>% as_tibble() %>% write_csv(here("Input_data", "join_ha_pd_gaul.csv")) # strange transformation. Creates weired rations in csv file + +# +# # Production +# # Join of harvested area and production +# join_pd_gaul = val_pd %>% +# group_by(ADM1_CODE, ADM0_CODE) %>% +# as_tibble() %>% +# # left_join(., val_ha, by="ADM1_CODE") %>% +# left_join(., shape_gaul, by = "ADM1_CODE") %>% +# rowwise() %>% +# mutate(tnuts_ratio_pd = sum_pd/sum(sum_pd, sum_rest_pd)) %>% #, +# # tnuts_ratio_ha = sum_ha/sum(sum_ha, sum_rest_ha)) %>% +# dplyr::select(G2008_1_ID, ADM0_CODE.x, ADM0_NAME, ADM0_NAME, ADM1_NAME, CONTINENT, REGION, +# sum_pd, sum_rest_pd, +# # sum_ha, sum_rest_ha, +# tnuts_ratio_pd, +# # tnuts_ratio_ha, +# geometry.y) %>% +# st_write(., here("Input_data", "GAUL_tnuts_rest_ratio_pd"), driver="ESRI Shapefile") # From here go to script +# join_pd_gaul=read_sf(here("Input_data", "GAUL_tnuts_rest_ratio_ha_pd")) +# +# # Harvested area +# join_ha_gaul = val_ha %>% +# group_by(ADM1_CODE, ADM0_CODE) %>% +# as_tibble() %>% +# # left_join(., val_ha, by="ADM1_CODE") %>% +# left_join(., shape_gaul, by = "ADM1_CODE") %>% +# rowwise() %>% +# mutate( +# # tnuts_ratio_pd = sum_pd/sum(sum_pd, sum_rest_pd), +# tnuts_ratio_ha = sum_ha/sum(sum_ha, sum_rest_ha)) %>% +# dplyr::select(G2008_1_ID, ADM0_CODE.x, ADM0_NAME, ADM0_NAME, ADM1_NAME, CONTINENT, REGION, +# # sum_pd, sum_rest_pd, +# sum_ha, sum_rest_ha, +# # tnuts_ratio_pd, +# tnuts_ratio_ha, geometry.y)# %>% +# st_write(., here("Input_data", "GAUL_tnuts_rest_ratio_ha"), driver="ESRI Shapefile") # From here go to script +# +# join_ha_gaul=read_sf(here("Input_data", "GAUL_tnuts_rest_ratio_ha_ha")) +# + + + +# Treenuts GAUL pixel --------------------------------------------------------------- +# Treenuts are loaded from the treenuts script +# Production +# # rbenchmark::benchmark( +# prod_dat_gaul_spam = SPAM_GAUL_JOIN %>% +# as_tibble() %>% +# left_join(.,sph.prod.tib, by="G2008_1_ID") %>% +# left_join(.,prd_dat, by="alloc_key") #%>% +# mutate(tnuts = trnt_r_*rest) %>% +# mutate(rest.1 = (1-trnt_r_)*rest) %>% +# dplyr::select(alloc_key, UID, GID_0.x, GID_1.x, rec_type, tech_type, unit, +# whea:rest, tnuts, rest.1) %>% +# write.csv(here("Input_data", "prod_tnuts_spam_gaul.csv")) + + +# c=read_csv(here("Input_data", "prod_tnuts_spam_gaul.csv")) +# +# # Approach validation > do the numbers make sense? +# xy=dat_join_prod %>% +# group_by(tnuts) %>% +# top_n(n=10) %>% +# dplyr::select(rest, tnuts, rest.1) +# +# write_csv(dat_join_prod, here("Input_data", "dat_treenut_prod.csv")) +# # st_as_sf() + +# Production +# rbenchmark::benchmark( +# treenut_pd = as_tibble(join_ha_pd_gaul) %>% +# rename(G2008_1_ID=G2008_1) %>% +# left_join(.,as_tibble(SPAM_GAUL_JOIN), by="G2008_1_ID") %>% +# right_join(.,prd_dat, by="alloc_key")%>% +# dplyr::select(alloc_key, G2008_1_ID, ADM1_CODE, ADM1_NAME, ADM0_CODE, ADM0_NAME, +# name_cntr, name_adm1.x, iso3.x, rec_type, tech_type, unit, whea:rest, tnuts_ratio_pd) %>% +# rename( +# # ADM1_NAME = ADM1_NAME.x, +# # ADM0_CODE = ADM0_CODE.x, +# # ADM0_NAME = ADM0_NAME.x, +# name_adm1 = name_adm1.x, +# iso3 =iso3.x) %>% +# write.csv(here("Input_data", "prod_tnuts_spam_gaul1.csv")) + +treenut_pd = as_tibble(join_ha_pd_gaul) %>% + # rename(G2008_1_ID = G2008_1) %>% + left_join(.,as_tibble(SPAM_GAUL_JOIN), by="G2008_1_ID") %>% + right_join(., prd_dat, by="alloc_key") %>% + dplyr::select(alloc_key, G2008_1_ID, ADM1_CODE, ADM1_NAME.x, ADM0_CODE.x, ADM0_NAME.x, + name_cntr, name_adm1.x, iso3.x, rec_type, tech_type, unit, whea:rest, tnuts_ratio_pd) %>% + rename(name_adm1 = name_adm1.x, + ADM1_NAME = ADM1_NAME.x, + ADM0_CODE =ADM0_CODE.x, + ADM0_NAME =ADM0_NAME.x, + iso3 =iso3.x) %>% + write_csv(here("Input_data", "prod_tnuts_spam_gaul1.csv")) +treenut_pd = read.csv(here("Input_data","prod_tnuts_spam_gaul1.csv")) + + +# Harvested area +treenut_ha = as_tibble(join_ha_pd_gaul) %>% + # rename(G2008_1_ID = G2008_1) %>% + left_join(.,as_tibble(SPAM_GAUL_JOIN), by="G2008_1_ID") %>% + right_join(.,har_dat , by="alloc_key") %>% + dplyr::select(alloc_key, G2008_1_ID, ADM1_CODE, ADM1_NAME.x, ADM0_CODE.x, ADM0_NAME.x, + name_cntr, name_adm1.x, iso3.x, rec_type, tech_type, unit, whea:rest, tnuts_ratio_ha) %>% + rename(name_adm1 = name_adm1.x, + ADM1_NAME = ADM1_NAME.x, + ADM0_CODE =ADM0_CODE.x, + ADM0_NAME =ADM0_NAME.x, + iso3 =iso3.x) %>% + write.csv(here("Input_data", "ha_tnuts_spam_gaul1.csv")) +treenut_ha = read_csv(here("Input_data","ha_tnuts_spam_gaul1.csv")) + +# physical area +treenut_phys = as_tibble(join_ha_pd_gaul) %>% + # rename(G2008_1_ID=G2008_1) %>% + left_join(.,as_tibble(SPAM_GAUL_JOIN), by="G2008_1_ID") %>% + right_join(.,phy_dat, by="alloc_key") %>% + dplyr::select(alloc_key, G2008_1_ID, ADM1_CODE, ADM1_NAME.x, ADM0_CODE.x, ADM0_NAME.x, + name_cntr, name_adm1.x, iso3.x, rec_type, tech_type, unit, whea:rest, tnuts_ratio_ha) %>% + rename(name_adm1 = name_adm1.x, + ADM0_CODE =ADM0_CODE.x, + ADM0_NAME =ADM0_NAME.x, + ADM1_NAME = ADM1_NAME.x, + iso3 =iso3.x) %>% + write.csv(here("Input_data", "phys_tnuts_spam_gaul1.csv")) +treenut_phys = read.csv(here("Input_data","phys_tnuts_spam_gaul1.csv")) + + + +# Approach: Aim is to get dinal datasets on pixel level > Adm1 level +# 1_Cbind the three datasets +# 2_Gather crops +# 3_Spread levels rec type +# 4_Calculate yields after unit transformation + + +# # GThis data contains all crops spread > Values correspond to the unaggregated SPAM dataset +dat.phys = read_csv(here("Input_data", "phys_tnuts_spam_gaul1.csv")) # this is pixel level spam +dat.prod = read_csv(here("Input_data", "prod_tnuts_spam_gaul1.csv")) # this is pixel level spam +dat.harv = read_csv(here("Input_data", "ha_tnuts_spam_gaul1.csv")) # this is pixel level spam + +# calulating treenuts and rest from share ratio +dat_prod = treenut_pd %>% mutate(tnuts = rest*tnuts_ratio_pd, + rest = rest*(1-tnuts_ratio_pd)) %>% + dplyr::select(-c(tnuts_ratio_pd)) %>% + write.csv(here("Input_data", "prod_tnuts_rest_spam_gaul_FINAL1.csv")) + +dat_harv = dat.harv %>% mutate(tnuts = rest*tnuts_ratio_ha, + rest = rest*(1-tnuts_ratio_ha)) %>% + dplyr::select(-c(tnuts_ratio_ha))%>% + write.csv(here("Input_data", "harv_tnuts_rest_spam_gaul_FINAL.csv")) + +dat_phys = dat.phys %>% mutate(tnuts = rest*tnuts_ratio_ha, + rest = rest*(1-tnuts_ratio_ha)) %>% + dplyr::select(-c(tnuts_ratio_ha)) %>% + write.csv(here("Input_data", "phys_tnuts_rest_spam_gaul_FINAL.csv")) + +# This data contains the final treenuts isolated from the initial rest. Rest here means now the herbs, etc +dat_phys = read_csv(here("Input_data", "phys_tnuts_rest_spam_gaul_FINAL.csv")) #tnutsratios good here -checked +dat_prod = read_csv(here("Input_data", "prod_tnuts_rest_spam_gaul_FINAL1.csv")) +dat_harv = read_csv(here("Input_data", "harv_tnuts_rest_spam_gaul_FINAL.csv")) +# head(dat.phys) +# head(dat.prod) +# head(dat.harv) + +# lst.tnuts = list(dat.harv, dat.phys, dat.prod) +dat_nutsall <- do.call("rbind", list(dat_harv, dat_phys, dat_prod)) + +# Calculating the yield for nuts +# yield_nut_dat = +do.call("rbind", list(dat_harv, dat_phys, dat_prod)) %>% + dplyr::select(-c(X1)) %>% + pivot_longer(names_to = "crops", values_to = "value", cols = whea:tnuts) %>% + pivot_wider(-unit, names_from = "rec_type", + values_from = "value") %>% + mutate(Y = (P*1000/H))%>% #checked for result. We are transforming mt to kg when multiplying Production by 1000 to get unit kg/ha for yields + mutate_at(vars(Y), ~replace(., is.nan(.), 0)) %>% + # rename(c("GID_0" ="GID_0.x", "GID_1"="GID_1.x")) %>% + write_csv(here("Input_data", "pxl_rec_spreadtec_gaul_spam_tnuts.csv")) +dat.all=read_csv(here("Input_data", "pxl_rec_spread_tec_gaul_spam_tnuts.csv")) + + + + + + + + + + + + + +# +# +# +# # OLD --------------------------------------------------------------------- +# # Loading the files and calculate final ratio ----------------------------- +# wd2=setwd(here("Input_data")) +# +# sph.prod1 = read_sf(dsn = wd2, layer = "GADM_treenut_rest_pd") +# sph.harv1 = read_sf(dsn = wd2, layer = "GADM_treenut_rest_ha") +# sph.all = st_join(sph.prod, sph.harv, join = st_within) +# write_sf(sph.all, dsn = here("Input_data"), driver = "ESRI Shapefile") #shapefile is called 'Input_data' +# +# join_harv = st_join(SPAM_GADM_JOIN_GIS_left, sph.harv, join = st_intersects) +# join_harv_prod_SPAM_GADM = st_join(join_harv, sph.prod, join = st_intersects) +# +# # Joining GADM with SPAM ------------------------------------------------------- +# wd=setwd(here("Input_data")) +# +# # Load and check shapefile +# shape=read_sf(dsn = wd, layer = "gadm36") # multipolygons containing all admin levels globally +# shape=dplyr::select(shape, 'UID', 'geometry', 'GID_0', 'GID_1') +# # shape_fil=filter(shape, GID_0 == 'FIN') +# +# # Making SPAM spatial explicit +# dat_spam=read_csv(here("Input_data", "spam2010V2r0_global_Y_TA.csv")) +# dat_spam=dplyr::select(dat_spam, 'x','y', 'alloc_key') +# +# # Make SPAM spatial +# projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0" +# dat_cent <- st_as_sf(x = dat_spam, +# coords = c("x", "y"), +# crs = projcrs) +# +# class(dat_cent) +# # dat_cent = filter(df, iso3=="FIN") +# +# +# SPAM_GADM_JOIN_GIS_left = st_join(dat_cent, shape, join = st_intersects, left = T) #https://postgis.net/docs/ST_Within.html +# # head(SPAM_GADM_JOIN_GIS) +# # class(SPAM_GADM_Join) +# +# st_write(obj = SPAM_GADM_JOIN_GIS, dsn = here("Input_data"), driver = "ESRI Shapefile") +# st_write(obj = SPAM_GADM_JOIN_GIS_left, dsn = here("Input_data"), driver = "ESRI Shapefile") +# +# # Joining SPAM and GAUL +# +# + diff --git a/Veggie_isolation/veggies.R b/Veggie_isolation/veggies.R new file mode 100644 index 0000000000000000000000000000000000000000..904cb78263026e78371deac31282c3678f01ec62 --- /dev/null +++ b/Veggie_isolation/veggies.R @@ -0,0 +1,274 @@ +easypackages::packages(c("tidyverse", 'sf', 'raster','sp','dplyr','spatialEco', + 'exactextractr','tibble', 'here', "readr", "margrittr","tydr", + "readxl", "foreign", "data.table", "spData","spDataLarge", "purrr", "rgdal", + "ggplot2", "RColorBrewer", "ggplotify", "treemap", "treemapify")) +# Load data +v1= read_csv(here("Input_data", "FAOSTAT_data_9-28-2020_Yields_full.csv")) %>% + rename(crop_code = "Item Code") + +spm=read_csv(here("Input_data", "SPAM_crops.csv")) + +# Getting the FAO CODES of the veggies aggregated in SPAM as "Vegetables" +veg_FAO = c(358:463) +length(veg_FAO) #106 veggies are aggregated in SPAM, of which only 29 are represented here. +v2=v1 %>% filter(crop_code %in% veg_FAO) %>% write_csv(here("Input_data", "veggies.csv")) + +A = read_csv(here("Input_data", "veggies3.csv")) +n1 = read_csv(here("Input_data", "Nutri_cifos.csv"),skip=2) + +# deleting rows +n1.3= filter(n1, !(Family %in% c("Fish", "Meat", "Egg", "Dairy", "Sugar", "Nut", "Oil", "Grain", "Other", "Fruit"))) + +# Calulatin the percentage for each of the nutrients +n2.1=n1.3 %>% + # drop_na() %>% + dplyr::select(-c(Weight:Energy)) %>% + mutate_at(vars(Protein:N), + .funs = list(perc = ~ (.)/sum(.)*100)) #validated that all percentage columns add up to 100 +# %>% write_csv(here("Input_data", "perc.csv")) + +n2 <- n2.1 %>% dplyr::select(-c(Protein:N)) %>% + # gather(n2.1, "content", "value_absolute", "Weight":"N") %>% + gather(., "content_perc", "value_perc", "Protein_perc":"N_perc") %>% + dplyr::select(-c("Raw NEVO", "Cooked NEVO")) %>% + rename(usage = X3) %>% + right_join(.,v3, by = c("Product"= "CIFOS_nam")) + +vec_nutr = pull(n2, var=content_perc) %>% + unique() %>% + na.omit() + +dat_relationship_SNP$Triad_summary_geno + +# Creating vectors with subsets per group +macro = vec_nutr[c(4,5,7)] +micro = vec_nutr[c(8:length(vec_nutr))] +fibr = vec_nutr[6] +water = vec_nutr[2] +weight = vec_nutr[1] +Energy = vec_nutr[3] + +# adding factors +n3=n2 %>% + mutate(nutri_type = case_when( + content_perc %in% macro ~ "Macro_nutrients", + content_perc %in% micro ~ "Micro_nutrients", + content_perc %in% weight ~ "Weight", + content_perc %in% Energy ~ "Energy", + content_perc %in% water ~ "Water", + content_perc %in% fibr ~ "Fiber")) + +n4 = n3 %>% + filter(content_perc != "Water_perc") %>% + filter(content_perc != "Weight_perc") %>% + filter(content_perc != "NA") %>% + filter(content_perc != "Energy_perc") %>% + filter(content_perc != "B12_perc") %>% + filter(content_perc != "Cholesterol_perc") %>% + filter(content_perc != "D_perc") %>% + filter(content_perc != "DHA_perc") %>% + filter(content_perc != "EPA_perc") %>% + filter(nutri_type != "Water") %>% + filter(nutri_type != "Weight") %>% + # filter(content_perc != "NA") %>% + filter(nutri_type != "Energy") %>% + filter(nutri_type != "Fiber") + + + +# Data +data <- n4 %>% dplyr::select(value_perc, content_perc, veg_type, nutri_type) + + + +a4.4 = an4 %>% group_by(veg_type) %>% + drop_na() %>% + mutate(value_perc = (value_absolute)/sum(value_absolute)*100) #%>% + # write_csv(here("Input_data", "check4.csv")) + +# Calculates mean, sd, se and IC +n4.5 <- an4 %>% + group_by(nutri_type, veg_type) %>% #content and nutri_type are dependent + summarise( + n=n(), + # mean_perc=mean(value_perc), + # sd_perc=sd(value_perc), + mean=mean(value_absolute), +sd=sd(value_absolute)) %>% + # mutate(se=sd/sqrt(n)) %>% + # mutate(ic=se * qt((1-0.05)/2 + .5, n-1)) %>% + # group_by(content, veg_type) %>% + right_join(., a4.4, by = "veg_type") %>% + # dplyr::rename(veg_type = veg_type.x) %>% + drop_na() +write_csv(n4.5, here("Input_data", "check5.csv")) + +# n5 = n4 %>% +# dplyr::select(-c("content", value_absolute)) + +# all values mixed +ggplot(a4.4, aes(x=veg_type, y=value_perc, fill=content))+ + geom_bar(stat="identity", position= "dodge", alpha=0.7)+ + facet_wrap(~nutri_type, scales = "free_y") + +# facet +ggplot(n4, aes(x=veg_type, y=value, fill=content))+ + geom_bar(stat="identity", position= "dodge", alpha=0.7) + + facet_wrap(~nutri_type, scales = "free_y")+ + theme(axis.text.x=element_text(angle = -90, hjust = 0)) + +ggplot(n5, aes(x=veg_type, y=value_perc, fill=content_perc))+ + geom_bar(stat="identity", position= "dodge", alpha=0.7) + + facet_wrap(~content_perc, scales = "free_y")+ + theme(axis.text.x=element_text(angle = -90, hjust = 0)) + +# nutrients stacked +ggplot(a4.4, aes(x=veg_type, y=value_perc, fill=content))+ + geom_bar(stat="identity", position= "dodge", alpha=0.7, show.legend = F) + + # geom_errorbar(data=n4.1, aes(x=veg_type, ymin=mean-sd , ymax=mean+sd), width=0.4, colour="black", alpha=0.9, size=1.3)+ + facet_wrap(~content, scales = "free_y")+ + theme(axis.text.x=element_text(angle = -90, hjust = 0)) + +# Nutrients averaged +ggplot(n4.5, aes(x=veg_type.x, y=value_absolute, fill=content))+ + # geom_bar(stat="identity", position= "dodge", alpha=0.7, show.legend = F) + +geom_bar(position = "dodge", stat = "summary", fun = "mean", alpha=0.7, show.legend = F)+ + geom_errorbar(aes(x=veg_type.x, ymin=mean-sd , ymax=mean+sd), width=0.4, colour="black", alpha=0.9, size=1.3)+ + facet_wrap(~content, scales = "free_y")+ + theme(axis.text.x=element_text(angle = -90, hjust = 0)) + + +# Perc -------------------------------------------------------------------- + +# avergae contribution +ggplot(an4, aes(x=veg_type, y=value_absolute, fill=content))+ + # geom_bar(stat="identity", position= "dodge", alpha=0.7, show.legend = F) + + geom_bar(position = "dodge", stat = "summary", fun = "mean", alpha=0.7, show.legend = F)+ + geom_errorbar(data=n4.5, aes(x=veg_type, ymin=mean-sd , ymax=mean+sd), width=0.4, colour="black", alpha=0.9, size=1.3)+ + facet_wrap(~content, scales = "free_y")+ + theme(axis.text.x=element_text(angle = -90, hjust = 0)) + + + +# Analyse by tables ------------------------------------------------------- +n4 %>% group_by(nutri_type, value_perc) + +write_csv(n4, here("Input_data", "check2.csv")) + + + + +# absolute -------------------------------------------------------------- +# deleting rows +an1.3= filter(n1, !(Family %in% c("Fish", "Meat", "Egg", "Dairy", "Sugar", "Nut", "Oil", "Grain", "Other", "Fruit"))) +# +# # Calulatin the percentage for each of the nutrients +# an2.1=an1.3 %>% +# # drop_na() %>% +# mutate_at(vars(Weight:N), +# .funs = list(perc = ~ (.)/sum(.)*100)) #validated that all percentage columns add up to 100 +# # %>% write_csv(here("Input_data", "perc.csv")) + +an2 <- an1.3 %>% + gather("content", "value_absolute", "Weight":"N") %>% + # gather(., "content_perc", "value_perc", "Weight_perc":"N_perc") %>% + dplyr::select(-c("Raw NEVO", "Cooked NEVO")) %>% + rename(usage = X3) %>% + right_join(.,v3, by = c("Product"= "CIFOS_nam")) + +vec_nutra = pull(an2, var=content) %>% + unique() %>% + na.omit() + +# Creating vectors with subsets per group +macro = vec_nutra[c(4,5,7)] +micro = vec_nutra[c(8:length(vec_nutr))] +fibr = vec_nutra[6] +water = vec_nutra[2] +weight = vec_nutra[1] +Energy = vec_nutra[3] + +# adding factors +an3=an2 %>% + mutate(nutri_type = case_when( + content %in% macro ~ "Macro_nutrients", + content %in% micro ~ "Micro_nutrients", + content %in% weight ~ "Weight", + content %in% Energy ~ "Energy", + content %in% water ~ "Water", + content %in% fibr ~ "Fiber")) + +an4 = an3 %>% + filter(content != "Water") %>% + filter(content != "Weight") %>% + filter(content != "NA") %>% + filter(content != "Energy") %>% + filter(content != "B12") %>% + filter(content != "Cholesterol") %>% + filter(content != "D") %>% + filter(content != "DHA") %>% + filter(content != "EPA") #%>% + # drop_na() + + +# Data +data <- an4 %>% dplyr::select(value_absolute, content, veg_type, nutri_type) + +# Calculates mean, sd, se and IC +an4.1 <- an4 %>% + group_by(content, veg_type) %>% + summarise( + n=n(), + mean=mean(value_absolute), + sum=sum(value_absolute), + sd=sd(value_absolute)) %>% + mutate(se=sd/sqrt(n)) %>% + mutate(ic=se * qt((1-0.05)/2 + .5, n-1)) %>% + # group_by(content, veg_type) %>% + right_join(., an4, by = "content") %>% + dplyr::rename(veg_type = veg_type.x) + + + + +# n5 = n4 %>% +# dplyr::select(-c("content", value_absolute)) + +# all values mixed +ggplot(an4.1, aes(x=nutri_type, y=value_absolute, fill=veg_type))+ + geom_bar(stat="identity", position= "dodge", alpha=0.7) + +# facet +ggplot(n4, aes(x=veg_type, y=value, fill=content))+ + geom_bar(stat="identity", position= "dodge", alpha=0.7) + + facet_wrap(~nutri_type, scales = "free_y")+ + theme(axis.text.x=element_text(angle = -90, hjust = 0)) + +ggplot(n5, aes(x=veg_type, y=value_perc, fill=content_perc))+ + geom_bar(stat="identity", position= "dodge", alpha=0.7) + + facet_wrap(~content_perc, scales = "free_y")+ + theme(axis.text.x=element_text(angle = -90, hjust = 0)) + +# nutrients stacked +ggplot(an4, aes(x=veg_type, y=mean, fill=content))+ + geom_bar(stat="identity", position= "stack", alpha=0.7, show.legend = F) + + geom_errorbar(data=an4.1, aes(x=veg_type, ymin=mean-sd , ymax=mean+sd), width=0.4, colour="black", alpha=0.9, size=1.3)+ + facet_wrap(~content, scales = "free_y")+ + theme(axis.text.x=element_text(angle = -90, hjust = 0)) + +# Nutrients averaged +ggplot(an4, aes(x=veg_type, y=value_absolute, fill=veg_type))+ + geom_bar(position = "dodge", stat = "summary", fun = "mean", alpha=0.7, show.legend = F)+ + # geom_errorbar(data=an4.1, aes(x=veg_type, ymin=mean-sd , ymax=mean+sd), width=0.4, colour="black", alpha=0.9, size=1.3)+ + theme(axis.text.x=element_text(angle = -90, hjust = 0))#+ + # facet_wrap(~content, scales = "free_y") + + + +# Analyse by tables ------------------------------------------------------- +n4 %>% group_by(nutri_type, value_perc) + +write_csv(n4, here("Input_data", "check2.csv")) + + + diff --git a/Zones/Admin1_SpatialJoin.R b/Zones/Admin1_SpatialJoin.R new file mode 100644 index 0000000000000000000000000000000000000000..769afc6958919d21b0122ebea38264662c12f4ef --- /dev/null +++ b/Zones/Admin1_SpatialJoin.R @@ -0,0 +1,122 @@ +easypackages::packages(c("tidyverse", 'sf', 'raster','sp','dplyr','spatialEco', + 'exactextractr','tibble', 'here', "readr", + "readxl", "foreign", "data.table", "rbenchmark", "benchmarkme", + "validate", "plyr")) + +# Set working directory +setwd(here::here("Input_data")) + +# Load data ---------------------------------------------------------------- +# Yields ------------------------------------------------------------------ +dat_TA_yld <- read_csv("spam2010V2r0_global_Y_TA.csv")# Yields - all technologies together, ie complete crop - Data downloaded on 1. of September 2020 +dat_TI_yld <- read_csv("spam2010V2r0_global_Y_TI.csv")# Yields - Iirrigated portion of crop +dat_TH_yld <- read_csv("spam2010V2r0_global_Y_TH.csv")# Yields - rainfed high inputs portion of crop +dat_TL_yld <- read_csv("spam2010V2r0_global_Y_TL.csv")# Yields - rainfed low inputs portion of crop +dat_TS_yld <- read_csv("spam2010V2r0_global_Y_TS.csv")# Yields - rainfed subsistence portion of crop +dat_TR_yld <- read_csv("spam2010V2r0_global_Y_TR.csv")# Yields - rainfed portion of crop (= TA - TI, or TH + TL + TS) + +# Production +dat_TA_pr <- read_csv("spam2010V2r0_global_P_TA.csv") +dat_TI_pr <- read_csv("spam2010V2r0_global_P_TI.csv") +dat_TH_pr <- read_csv("spam2010V2r0_global_P_TH.csv") +dat_TL_pr <- read_csv("spam2010V2r0_global_P_TL.csv") +dat_TS_pr <- read_csv("spam2010V2r0_global_P_TS.csv") +dat_TR_pr <- read_csv("spam2010V2r0_global_P_TR.csv") + +# Physical area-------------------------------- +dat_TA_pa <- read_csv("spam2010V2r0_global_A_TA.csv") +dat_TI_pa <- read_csv("spam2010V2r0_global_A_TI.csv") +dat_TH_pa <- read_csv("spam2010V2r0_global_A_TH.csv") +dat_TL_pa <- read_csv("spam2010V2r0_global_A_TL.csv") +dat_TS_pa <- read_csv("spam2010V2r0_global_A_TS.csv") +dat_TR_pa <- read_csv("spam2010V2r0_global_A_TR.csv") + +# Harvesting area---------------------------------- +dat_TA_ha <- read_csv("spam2010V2r0_global_H_TA.csv") +dat_TI_ha <- read_csv("spam2010V2r0_global_H_TI.csv") +dat_TH_ha <- read_csv("spam2010V2r0_global_H_TH.csv") +dat_TL_ha <- read_csv("spam2010V2r0_global_H_TL.csv") +dat_TS_ha <- read_csv("spam2010V2r0_global_H_TS.csv") +dat_TR_ha <- read_csv("spam2010V2r0_global_H_TR.csv") + +# Create a spatial SPAM file +geo_spam=dplyr::select(dat_TA_pr, x,y, cell5m, name_adm1, iso3) +# %>% +# distinct_at(vars(name_adm1)) + +# Make SPAM spatial +# projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0" +# proj_mercator = "+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs" +proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below +proj_meter = "+datum=WGS84 +units=m +no_defs" +geo_spam1 <- st_as_sf(x = geo_spam, + coords = c("x", "y"), + crs = proj) + +# Load administrative unit shapefile which matches the SPAM adm1 zones +Admin0 = st_read(here::here("Input_data","GAUL_GADM_SPAM_ULRIKE", "g2008_0_now","g2008_0.shp")) +Admin1 = st_read(here::here("Input_data","GAUL_GADM_SPAM_ULRIKE", "g2008_1_now","g2008_1.shp")) #this is what we need +Admin2 = st_read(here::here("Input_data","GAUL_GADM_SPAM_ULRIKE", "g2008_2_2020_11_02","g2008_2.shp")) #this is what we need + + +# Define projection for GAULGADMSPAM +proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below +proj_meter = "+datum=WGS84 +units=m +no_defs" +Admin1 <- st_as_sf(x = Admin1, + # coords = c("x", "y"), + crs = proj) + +# Joining SPAM df and GAUL-GADM-SPAM -------------------------------------- + +# Ulrike's comment: Shape files for GAUL-GADM-SPAM +# Please note that the columns fips0, fips1 and fips2 are the ones which have the codes for admin level 0 (country), level 1 and level 2 which can be linked to SPAM results. +# The matching names are in columns Adm0_name, name1 and name2.# +# Columns which have names O_fips1, O_fips2, O_name1 and O_name2 refer to an “old” classification system at level 1 and level 2. Its entries differ from the columns without “O_” in some EU countries and a few others where the statistic units did not match the admin units. Please note that the shape files themselves were not modified, only the entries in the dbf tables. + +# adm1 unit match check +geo_spam2 = dat_TA_pr %>% dplyr::select(iso3, cell5m, name_cntr, name_adm1, x, y) %>% + distinct_at(vars(name_adm1), .keep_all = T) + +# Define projection for GAULGADMSPAM +proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below +proj_meter = "+datum=WGS84 +units=m +no_defs" +geo_spam3 <- st_as_sf(x = geo_spam2, + coords = c("x", "y"), + crs = proj) + + +# distinct GGS at NAM1 +adm1_distinct=Admin1 %>% as_tibble() %>% + distinct_at(vars(NAME1), .keep_all = T) %>% + write_csv(., here::here("Input_data", "GGS_adm1_dist_full.csv")) + +# Converting GGS to dataframe for testing adm1 levels in excel +GGS_df_adm1 =Admin1 %>% dplyr::select(-geometry) %>% + as_tibble() %>% + # distinct_at(vars(NAME1), .keep_all = T) %>% + write_csv(., here::here("Input_data", "GGS_adm1_full.csv")) + + +# convert to sf (spatial) +proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below +proj_meter = "+datum=WGS84 +units=m +no_defs" +adm1_distinct2 <- st_as_sf(x = adm1_distinct, + # coords = c("x", "y"), + crs = proj) + +# Spatial join +join_SPAM_adm1 = st_join(adm1_distinct2, geo_spam3, join = st_intersects) +# 2857 - 2748 = 109 is the difference between join_SPAM_adm1 and adm1_distinct2 + +# FUNcheck_incons = function(ped){ +# ped1 = ped %>% select(id, dam, sire) +# TestSire_random=ped1$sire==solution_pedsim2$Sire +# testParents= left_join(ped1, solution_pedsim2) %>% +conS_check = join_SPAM_adm1 %>% + rowwise() %>% + mutate(testAdm1 = ifelse(NAME1==name_adm1, 1,0))%>% + mutate(testAdm0 = ifelse(ADM0_NAME==name_cntr, 1,0)) %>% + as_tibble() %>% + dplyr::select(NAME1, name_adm1, testAdm1, ADM0_NAME, name_cntr, testAdm0) %>% + write_csv(., here::here("Input_data", "ADm1_0_chck.csv")) +