# Clear workingspace
rm(list=ls())
gc()

library(tidyr)
library(devtools)
library(readr)
library(foreign)
library(tibble)
library(dplyr)
library(plyr)
library(data.table)
library(readr)
library(tidyr)

# library(tidyverse)
# Load data ----------------------------------------------------------------

# Yields ------------------------------------------------------------------

# Yields - all technologies together, ie complete crop - Data downloaded on 1. of September 2020
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_yield2.csv')
dat_TA_yld <- read_csv("spam2010V2r0_global_Y_TA.csv")
dat_TA_yld <- as_tibble(dat_TA_yld)
# dat_TA_yld$name_adm1 <- as.factor(dat_TA_yld$name_adm1)
# dat_TA_yld$prod_level <- as.factor(dat_TA_yld$prod_level)
# class(dat_TA_yld$prod_level)
# head(dat_tbl_yld)

# Yields - Iirrigated portion of crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_yield2.csv')
dat_TI_yld <- read_csv("spam2010V2r0_global_Y_TI.csv")
dat_TI_yld <- as_tibble(dat_TI_yld)

# Yields - rainfed high inputs portion of crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_yield2.csv')
dat_TH_yld <- read_csv("spam2010V2r0_global_Y_TH.csv")
dat_TH_yld <- as_tibble(dat_TH_yld)

# Yields - rainfed low inputs portion of crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_yield2.csv')
dat_TL_yld <- read_csv("spam2010V2r0_global_Y_TL.csv")
dat_TL_yld <- as_tibble(dat_TL_yld)

# Yields - rainfed subsistence portion of crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_yield2.csv')
dat_TS_yld <- read_csv("spam2010V2r0_global_Y_TS.csv")
dat_TS_yld <- as_tibble(dat_TS_yld)

# Yields - rainfed portion of crop (= TA - TI, or TH + TL + TS)
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_yield2.csv')
dat_TR_yld <- read_csv("spam2010V2r0_global_Y_TR.csv")
dat_TR_yld <- as_tibble(dat_TR_yld)



# Physical area--------------------------------
#  all technologies together, ie complete crop - Data downloaded on 1. of September 2020
setwd("C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_phys_area2.csv")
dat_TA_pa <- read_csv("spam2010V2r0_global_A_TA.csv")
dat_TA_pa <- as_tibble(dat_TA_pa)

#   phys_area - Iirrigated portion of crop
setwd("C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_phys_area2.csv")
dat_TI_pa <- read_csv("spam2010V2r0_global_A_TI.csv")
dat_TI_pa <- as_tibble(dat_TI_pa)

#   phys_area - rainfed high inputs portion of crop
setwd("C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_phys_area2.csv")
dat_TH_pa <- read_csv("spam2010V2r0_global_A_TH.csv")
dat_TH_pa <- as_tibble(dat_TH_pa)

#   phys_area - rainfed low inputs portion of crop
setwd("C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_phys_area2.csv")
dat_TL_pa <- read_csv("spam2010V2r0_global_A_TL.csv")
dat_TL_pa <- as_tibble(dat_TL_pa)

#   phys_area - rainfed subsistence portion of crop
setwd("C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_phys_area2.csv")
dat_TS_pa <- read_csv("spam2010V2r0_global_A_TS.csv")
dat_TS_pa <- as_tibble(dat_TS_pa)

#   phys_area - rainfed portion of crop (= TA - TI, or TH + TL + TS)
setwd("C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_phys_area2.csv")
dat_TR_pa <- read_csv("spam2010V2r0_global_A_TR.csv")
dat_TR_pa <- as_tibble(dat_TR_pa)



# Harvesting area----------------------------------
# all technologies together, ie complete crop - Data downloaded on 1. of September 2020
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_harv_area2.csv')
dat_TA_ha <- read_csv("spam2010V2r0_global_H_TA.csv")
dat_TA_ha <- as_tibble(dat_TA_ha)

# Physical area - Iirrigated portion of crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_harv_area2.csv')
dat_TI_ha <- read_csv("spam2010V2r0_global_H_TI.csv")
dat_TI_ha <- as_tibble(dat_TI_ha)

# Physical area - rainfed high inputs portion of crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_harv_area2.csv')
dat_TH_ha <- read_csv("spam2010V2r0_global_H_TH.csv")
dat_TH_ha <- as_tibble(dat_TH_ha)

# Physical area - rainfed low inputs portion of crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_harv_area2.csv')
dat_TL_ha <- read_csv("spam2010V2r0_global_H_TL.csv")
dat_TL_ha <- as_tibble(dat_TL_ha)

# Physical area - rainfed subsistence portion of crop
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_harv_area2.csv')
dat_TS_ha <- read_csv("spam2010V2r0_global_H_TS.csv")
dat_TS_ha <- as_tibble(dat_TS_ha)

# Physical area - rainfed portion of crop (= TA - TI, or TH + TL + TS)
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010/spam2010v2r0_global_harv_area2.csv')
dat_TR_ha <- read_csv("spam2010V2r0_global_H_TR.csv")
dat_TR_ha <- as_tibble(dat_TR_ha)

# dat_TR_yld[1:15,]

# Functions - Yld ---------------------------------------------------------------
ChangeNames <- function(x) { ## Renaming function
  crp <- crp <- c('whea', 'rice', 'maiz', 'barl', 'pmil', 'smil', 'sorg', 'ocer', 'pota', 'swpo', 'yams', 'cass', 'orts', 'bean', 'chic', 'cowp', 'pige', 'lent', 'opul', 'soyb',
                  'grou', 'cnut', 'oilp', 'sunf', 'rape', 'sesa', 'ooil', 'sugc', 'sugb', 'cott', 'ofib', 'acof', 'rcof', 'coco', 'teas', 'toba', 'bana', 'plnt', 'trof', 'temf', 'vege', 'rest')
  names(x)[10:51] <- crp
  return(x)
} 
GatherFun <- function(k){
  k %>% gather('crop', 'valu', whea:rest)
}

AggregateFun <- function(z){ #for yield
 ddply(z, .(name_adm1, crop), 
        function(z) mean(z$valu) )
}

CollapseFun <- function(h){
  h %>% dplyr::distinct_at(vars('name_adm1', 'crop'), .keep_all = TRUE)
}

CombineFun <- function(d,s){
  full_join(d,s, by=c("name_adm1", 'crop'))
}

CleanFun <- function(l){
  l %>% 
    dplyr::rename(
      value = V1,
      cntr = iso3) %>%
        dplyr::select(cntr, name_cntr, name_adm1, alloc_key, rec_type, tech_type, crop, unit, valu)
      }

TransformFun <- function(d){
  p <- ChangeNames(dat_TA_pa)
  l <- GatherFun(p)
  r <- AggregateFun(l)
  y <- CollapseFun(l)
  u <- CombineFun(r,y)
  j <- CleanFun(u)
}

# rm(dat_TS_yld)

# Applying transformation to all yld_tech_types
TA_yld <- TransformFun(dat_TA_yld)
TH_yld <- TransformFun(dat_TH_yld)
TI_yld <- TransformFun(dat_TI_yld)
TL_yld <- TransformFun(dat_TL_yld)
TR_yld <- TransformFun(dat_TR_yld)
TS_yld <- TransformFun(dat_TS_yld)
lst.yld <- list(TA_yld, TH_yld, TI_yld, TL_yld, TR_yld, TS_yld)
dat_yld <- do.call("rbind", lst.yld) # merging all tech types
write.csv(dat_yld, 'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/Yield.csv', row.names=T) 

#Applying transformation to physical_tech types
TA_phy <- TransformFun(dat_TA_pa)
TH_phy <- TransformFun(dat_TH_pa)
TI_phy <- TransformFun(dat_TI_pa)
TL_phy <- TransformFun(dat_TL_pa)
TR_phy <- TransformFun(dat_TR_pa)
TS_phy <- TransformFun(dat_TS_pa)
lst.phy <- list(TA_phy, TH_phy, TI_phy, TL_phy, TR_phy, TS_phy)
dat_phy <- do.call("rbind", lst.phy) # merging all tech types
write.csv(dat_phy, 'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/Physical_area.csv', row.names=T) 

#Applying transformation to physical_tech types
TA_har <- TransformFun(dat_TA_ha)
TH_har <- TransformFun(dat_TH_ha)
TI_har <- TransformFun(dat_TI_ha)
TL_har <- TransformFun(dat_TL_ha)
TR_har <- TransformFun(dat_TR_ha)
TS_har <- TransformFun(dat_TS_ha)
lst.har <- list(TA_har, TH_har, TI_har, TL_har, TR_har, TS_har)
dat_har <- do.call("rbind", lst.har) # merging all tech types
write.csv(dat_har, 'C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Spatial_mod/Output_dat/Harvested_area.csv', row.names=T) 


# Crop usage --------------------------------------------------------------
setwd('C:/Users/molch/OneDrive - Wageningen University & Research/PhD_WJS/Academic/RQ1/Data_analysis/Raw_dat/Zonation/SPAM/2010')
food_cat <- read.csv('foodnonfood.csv')

























# Tidy up data  -----------------------------------------------------------
# Yield
yld_dat <- list(dat_TA_yld, dat_TI_yld, dat_TH_yld, dat_TL_yld, dat_TS_yld, dat_TR_yld)
crp <- c('whea', 'rice', 'maiz', 'barl', 'pmil', 'smil', 'sorg', 'ocer', 'pota', 'swpo', 'yams', 'cass', 'orts', 'bean', 'chic', 'cowp', 'pige', 'lent', 'opul', 'soyb',
         'grou', 'cnut', 'oilp', 'sunf', 'rape', 'sesa', 'ooil', 'sugc', 'sugb', 'cott', 'ofib', 'acof', 'rcof', 'coco', 'teas', 'toba', 'bana', 'plnt', 'trof', 'temf', 'vege', 'rest')

  
j <- do.call("rbind", h) #merge all dfs from list to one df by rowbind

  
renam_yld <- lapply(yld_dat, ChangeNames) #apply renaming to all the dfs in list
renam_yld <- lapply(renam_yld, GatherFun)
dat_yld <- do.call("rbind", renam_yld) #merge all dfs from list to one df by rowbind
test <- lapply(yld_dat, AggreagateFun)


x <- c(10:51)
for (i in x) {
  a <- with(dat_yld, ave(dat_yld[[i]], dat_yld$name_adm1))
  c <- c(colnames(dat_yld[i]))
  r <- rank(c[i])
  dat_yld[,paste0("ADM1mean",c[r])] <- a
}


dat_gat <- dat_yld %>% 
  gather(crop, valu, 10:51) #gather crop and yields down


dat_yld %>% group_by(dat_yld$name_adm1) %>% summarise = mean(dat_yld$valu)
  
  dat_gat2 <- dat_yld %>% 
  group_by(name_adm1) %>% 
  summarise(valu= mean(valu))

class(dat_gat$valu)
class

# aggregate for admin level 1
x <- c(10:51) #indicating which columns to take 10=Wheat to 51=Rest (to check: colnames(dat_tbl[50]))
# yields avg - countries
# for (i in x) {
#   a <- with(dat_tbl, ave(dat_tbl[[i]], dat_tbl$NAME_CNTR))
#   c <- c(colnames(dat_tbl[i]))
#   r <- rank(c[i])
#   dat_tbl[,paste0("CNTRYmean",c[r])] <- a
# }
# 
# # # yields avg - subnational level 2
# for (i in x) {
#   a <- with(dat_tbl, ave(dat_tbl[[i]], dat_tbl$NAME_ADM2))
#   c <- c(colnames(dat_tbl[i]))
#   r <- rank(c[i])
#   dat_tbl[,paste0("ADM2mean",c[r])] <- a
# }

# yields avg - subnational level 1
for (i in x) {
  a <- with(dat_yld, ave(dat_yld[[i]], dat_yld$name_adm1))
  c <- c(colnames(dat_yld[i]))
  r <- rank(c[i])
  dat_yld[,paste0("ADM1mean",c[r])] <- a
}

# Removing duplicate based on admin borders -------------------------------
dat_uc <- dat_tbl %>% distinct(NAME_CNTR, .keep_all= TRUE)
dat_ad1 <- dat_tbl %>% distinct(NAME_ADM1, .keep_all= TRUE)
dat_ad2 <- dat_tbl %>% distinct(NAME_ADM2, .keep_all= TRUE)




# Harv
dat_tdy_TA_ha <- dat_tbl_TA_ha %>% gather(crop, valu, 10:51)
head(dat_tbl_TA_ha[51])

# Phys
dat_tdy_TA_pa <- dat_tbl_TA_pa %>% gather(crop, valu, 10:51)

# Joining all data sets--------------------------------------------------------
Join_ha_yld <- full_join(dat_aloc,dat_tbl, by="ALLOC_KEY")



# Aggregating to subnational level 1 --------------------------------------

# Yields  ------------------------------------------------------------------
# Compute means for yields (kg/ha) for admin borders 

#All technical level (TA)

x <- c(10:51) #indicating which columns to take 10=Wheat to 51=Rest (to check: colnames(dat_tbl[50]))
# yields avg - countries
  for (i in x) {
  a <- with(dat_tbl, ave(dat_tbl[[i]], dat_tbl$NAME_CNTR))
  c <- c(colnames(dat_tbl[i]))
  r <- rank(c[i])
  dat_tbl[,paste0("CNTRYmean",c[r])] <- a
  }

# # yields avg - subnational level 2
for (i in x) {
  a <- with(dat_tbl, ave(dat_tbl[[i]], dat_tbl$NAME_ADM2))
  c <- c(colnames(dat_tbl[i]))
  r <- rank(c[i])
  dat_tbl[,paste0("ADM2mean",c[r])] <- a
}

# yields avg - subnational level 1
for (i in x) {
  a <- with(dat_tbl, ave(dat_tbl[[i]], dat_tbl$NAME_ADM1))
  c <- c(colnames(dat_tbl[i]))
  r <- rank(c[i])
  dat_tbl[,paste0("ADM1mean",c[r])] <- a
}

# Removing duplicate based on admin borders -------------------------------
dat_uc <- dat_tbl %>% distinct(NAME_CNTR, .keep_all= TRUE)
dat_ad1 <- dat_tbl %>% distinct(NAME_ADM1, .keep_all= TRUE)
dat_ad2 <- dat_tbl %>% distinct(NAME_ADM2, .keep_all= TRUE)


df_all <- as.data.frame(dat_tbl)
write.dbf(df_all, 'df_all.dbf')


# Harvested area ----------------------------------------------------------
# Aggregate data to a sum of harvested area at admin1 level


# Result = aggregated dataset of physical area for admin level 1 


# Physical area  ----------------------------------------------------------
# Aggregate data to a sum of physical area at admin1 level



# Result = aggregated dataset of physical area for admin level 1 

# # Joining all yields, harv and physical area dfs together
Join_ha_yld <- full_join(dat_aloc,dat_tbl, by="ALLOC_KEY")

# TODO: Check data for Fertilizer, Rotat, NP modelling. How to link it with this data


# ugb --------------------------------------------------------------------
Join_FAO_ISRIC<- full_join(dat_FAOsoil_tbl ,dat_isr_tibSUM, by="SNUM")

# Change unique identifier
dat_aloc <- dplyr::rename(dat_tblal,  ALLOC_KEY=alloc_key)
head(dat_aloc)
