# Clear workingspace
rm(list=ls())
gc()

library(tidyr)
library(devtools)
library(readr)
library(foreign)
library(tibble)
library(dplyr)
library(plyr)
library(data.table)
 
# library(tidyverse)
# Load data ----------------------------------------------------------------

# Yields ------------------------------------------------------------------

# Yields - all technologies together, ie complete crop - Data downloaded on 1. of September 2020
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_yield2.csv')
dat_TA_yld <- read_csv("spam2010V2r0_global_Y_TA.csv")
dat_TA_yld <- as_tibble(dat_TA_yld)
# dat_TA_yld$name_adm1 <- as.factor(dat_TA_yld$name_adm1)
# dat_TA_yld$prod_level <- as.factor(dat_TA_yld$prod_level)
# class(dat_TA_yld$prod_level)
# head(dat_tbl_yld)

# Yields - Iirrigated portion of crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_yield2.csv')
dat_TI_yld <- read_csv("spam2010V2r0_global_Y_TI.csv")
dat_TI_yld <- as_tibble(dat_TI_yld)

# Yields - rainfed high inputs portion of crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_yield2.csv')
dat_TH_yld <- read_csv("spam2010V2r0_global_Y_TH.csv")
dat_TH_yld <- as_tibble(dat_TH_yld)

# Yields - rainfed low inputs portion of crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_yield2.csv')
dat_TL_yld <- read_csv("spam2010V2r0_global_Y_TL.csv")
dat_TL_yld <- as_tibble(dat_TL_yld)

# Yields - rainfed subsistence portion of crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_yield2.csv')
dat_TS_yld <- read_csv("spam2010V2r0_global_Y_TS.csv")
dat_TS_yld <- as_tibble(dat_TS_yld)

# Yields - rainfed portion of crop (= TA - TI, or TH + TL + TS)
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_yield2.csv')
dat_TR_yld <- read_csv("spam2010V2r0_global_Y_TR.csv")
dat_TR_yld <- as_tibble(dat_TR_yld)



# Physical area--------------------------------
#  all technologies together, ie complete crop - Data downloaded on 1. of September 2020
setwd("C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_phys_area2.csv")
dat_TA_pa <- read_csv("spam2010V2r0_global_A_TA.csv")
dat_TA_pa <- as_tibble(dat_TA_pa)

#   phys_area - Iirrigated portion of crop
setwd("C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_phys_area2.csv")
dat_TI_pa <- read_csv("spam2010V2r0_global_A_TI.csv")
dat_TI_pa <- as_tibble(dat_TI_pa)

#   phys_area - rainfed high inputs portion of crop
setwd("C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_phys_area2.csv")
dat_TH_pa <- read_csv("spam2010V2r0_global_A_TH.csv")
dat_TH_pa <- as_tibble(dat_TH_pa)

#   phys_area - rainfed low inputs portion of crop
setwd("C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_phys_area2.csv")
dat_TL_pa <- read_csv("spam2010V2r0_global_A_TL.csv")
dat_TL_pa <- as_tibble(dat_TL_pa)

#   phys_area - rainfed subsistence portion of crop
setwd("C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_phys_area2.csv")
dat_TS_pa <- read_csv("spam2010V2r0_global_A_TS.csv")
dat_TS_pa <- as_tibble(dat_TS_pa)

#   phys_area - rainfed portion of crop (= TA - TI, or TH + TL + TS)
setwd("C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_phys_area2.csv")
dat_TR_pa <- read_csv("spam2010V2r0_global_A_TR.csv")
dat_TR_pa <- as_tibble(dat_TR_pa)



# Harvesting area----------------------------------
# all technologies together, ie complete crop - Data downloaded on 1. of September 2020
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_harv_area2.csv')
dat_TA_ha <- read_csv("spam2010V2r0_global_H_TA.csv")
dat_TA_ha <- as_tibble(dat_TA_ha)

# Physical area - Iirrigated portion of crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_harv_area2.csv')
dat_TI_ha <- read_csv("spam2010V2r0_global_H_TI.csv")
dat_TI_ha <- as_tibble(dat_TI_ha)

# Physical area - rainfed high inputs portion of crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_harv_area2.csv')
dat_TH_ha <- read_csv("spam2010V2r0_global_H_TH.csv")
dat_TH_ha <- as_tibble(dat_TH_ha)

# Physical area - rainfed low inputs portion of crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_harv_area2.csv')
dat_TL_ha <- read_csv("spam2010V2r0_global_H_TL.csv")
dat_TL_ha <- as_tibble(dat_TL_ha)

# Physical area - rainfed subsistence portion of crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_harv_area2.csv')
dat_TS_ha <- read_csv("spam2010V2r0_global_H_TS.csv")
dat_TS_ha <- as_tibble(dat_TS_ha)

# Physical area - rainfed portion of crop (= TA - TI, or TH + TL + TS)
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_harv_area2.csv')
dat_TR_ha <- read_csv("spam2010V2r0_global_H_TR.csv")
dat_TR_ha <- as_tibble(dat_TR_ha)

# dat_TR_yld[1:15,]

# Functions - Yld ---------------------------------------------------------------
ChangeNames <- function(x) { ## Renaming function
  crp <- crp <- c('whea', 'rice', 'maiz', 'barl', 'pmil', 'smil', 'sorg', 'ocer', 'pota', 'swpo', 'yams', 'cass', 'orts', 'bean', 'chic', 'cowp', 'pige', 'lent', 'opul', 'soyb',
                  'grou', 'cnut', 'oilp', 'sunf', 'rape', 'sesa', 'ooil', 'sugc', 'sugb', 'cott', 'ofib', 'acof', 'rcof', 'coco', 'teas', 'toba', 'bana', 'plnt', 'trof', 'temf', 'vege', 'rest')
  names(x)[10:51] <- crp
  return(x)
} 
GatherFun <- function(k){
  k %>% gather('crop', 'valu', whea:rest)
}

AggregateFun_mean <- function(z){ #for yield
 ddply(z, .(name_adm1, crop), 
        function(z) mean(z$valu[z$valu!=0]) )
}

AggregateFun_sum <- function(g){ #for area
  ddply(g, .(name_adm1, crop), 
        function(g) sum(g$valu))
}

CollapseFun <- function(h){
  h %>% dplyr::distinct_at(vars('name_adm1', 'crop'), .keep_all = TRUE)
}

CombineFun <- function(d,s){
  full_join(d,s, by=c("name_adm1", 'crop'))
}

CleanFun <- function(l){
  l %>% 
    dplyr::rename(
      value = V1,
      cntr = iso3) %>%
        dplyr::select(cntr, name_cntr, name_adm1, alloc_key, rec_type, tech_type, crop, unit, valu)
      }

TransformFun_yld <- function(d){
  p <- ChangeNames(dat_TA_yld)
  l <- GatherFun(p)
  r <- AggregateFun_mean(l)
  y <- CollapseFun(l)
  u <- CombineFun(r,y)
  j <- CleanFun(u)
}

TransformFun_area <- function(d){
  p <- ChangeNames(dat_TA_pa)
  l <- GatherFun(p)
  r <- AggregateFun_sum(l)
  y <- CollapseFun(l)
  u <- CombineFun(r,y)
  j <- CleanFun(u)
}


# rm(dat_TS_yld)

# Applying transformation to all yld_tech_types
TA_yld <- TransformFun_yld(dat_TA_yld)
TH_yld <- TransformFun_yld(dat_TH_yld)
TI_yld <- TransformFun_yld(dat_TI_yld)
TL_yld <- TransformFun_yld(dat_TL_yld)
TR_yld <- TransformFun_yld(dat_TR_yld)
TS_yld <- TransformFun_yld(dat_TS_yld)
lst.yld <- list(TA_yld, TH_yld, TI_yld, TL_yld, TR_yld, TS_yld)
dat_yld <- do.call("rbind", lst.yld) # merging all tech types
write.csv(dat_yld, 'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/Yield_fixd.csv', row.names=T) 

#Applying transformation to physical_tech types
TA_phy <- TransformFun_area(dat_TA_pa)
TH_phy <- TransformFun_area(dat_TH_pa)
TI_phy <- TransformFun_area(dat_TI_pa)
TL_phy <- TransformFun_area(dat_TL_pa)
TR_phy <- TransformFun_area(dat_TR_pa)
TS_phy <- TransformFun_area(dat_TS_pa)
lst.phy <- list(TA_phy, TH_phy, TI_phy, TL_phy, TR_phy, TS_phy)
dat_phy <- do.call("rbind", lst.phy) # merging all tech types
write.csv(dat_phy, 'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/Physical_area_fixd.csv', row.names=T) 

#Applying transformation to physical_tech types
TA_har <- TransformFun_area(dat_TA_ha)
TH_har <- TransformFun_area(dat_TH_ha)
TI_har <- TransformFun_area(dat_TI_ha)
TL_har <- TransformFun_area(dat_TL_ha)
TR_har <- TransformFun_area(dat_TR_ha)
TS_har <- TransformFun_area(dat_TS_ha)
lst.har <- list(TA_har, TH_har, TI_har, TL_har, TR_har, TS_har)
dat_har <- do.call("rbind", lst.har) # merging all tech types
write.csv(dat_har, 'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/Harvested_area_fixd.csv', row.names=T) 


# LOAD data from previous step (due to memory overuse)
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat')
yld_dat <- read_csv('Yield_fixd.csv')
har_dat <- read_csv('Harvested_area_fixd.csv')
phy_dat <- read_csv('Physical_area_fixd.csv')

# creating a tidy table with all tech variables in one df
list.comb <- list(yld_dat, har_dat, phy_dat)
dat.comb <- do.call("rbind", list.comb)
# write.csv(dat.comb, 'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/dat_comb_tidy.csv', row.names=T)

# loading crop usage sheet --------------------------------------------------------------
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Input_dat')
food_cat <- read_csv('foodnonfood.csv')
# Loading crop name sheets
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Input_dat')
nam_cat <- read_csv('crop_names.csv')
# loading FAO country codes
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Input_dat')
FAOcntr <- read_csv('FAO_countries.csv')
FAOcntr <- select(FAOcntr, 'ISO3', 'FAOSTAT_CODE', "GAUL_CODE", 'Continent', 'Subcontinent')

# joining all together
names(dat.comb)[8] <- "Crop_short"
names(nam_cat)[2] <- "Crop_short"
names(dat.comb)[2] <- "ISO3"

join.use <- left_join(dat.comb, food_cat, by="Crop_short")
join_FAOcntr <- left_join(join.use, FAOcntr, by="ISO3")
SPAM_dat_tidy <- left_join(join_FAOcntr, nam_cat, by='Crop_short')


# rename and reoder dataset columns
Clean2Fun <- function(x){
  i <- x %>% dplyr::rename(SPAMcrop_short=Crop_short, 
                           SPAMcrop_long=`SPAM long name`,
                           use=Usage) %>%
    dplyr::select(-c(`No. crt.`, ID))%>%
    arrange(name_adm1, SPAMcrop_short, rec_type, tech_type)
    c <- i[c('ISO3', 'name_cntr', 'name_adm1', 'alloc_key', 'FAOSTAT_CODE', 'GAUL_CODE',   'SPAMcrop_short', 'Continent', 'Subcontinent', 'SPAMcrop_long', 'FAONAMES', 'FAOCODE', 'GROUP', 'use', 'rec_type', 'tech_type', 'unit', 'valu')]
}
SPAM_dat_tidy <-  Clean2Fun(SPAM_dat_tidy)

# Save file - Option long dataset
write.csv(SPAM_dat_tidy, 'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/Yld_tidy_all.csv', row.names=T)

# Option2 - Harvested and Physical area horizontal
SprdFun <- function(x){
  t <- x %>% dplyr::select(-c(unit))  %>%
    spread(rec_type, valu)%>% 
    dplyr::rename(yield_kgha=Y, 
                  phys_area_ha=A,
                  harv_area_ha=H)
  c <- t[c('Continent', 'Subcontinent', 'ISO3', 'name_cntr', 'name_adm1', 'alloc_key', 'FAOSTAT_CODE', 'GAUL_CODE', 'SPAMcrop_short', 'SPAMcrop_long', 'FAONAMES', 'FAOCODE', 'GROUP', 'use', 'tech_type', 'phys_area_ha', 'harv_area_ha', 'yield_kgha')]
} 

sprd_dat <- SprdFun(SPAM_dat_tidy)
write.csv(sprd_dat_fixd,'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/SPAMspread_all_fixd.csv', row.names=T)

