diff --git a/SPAM_Data_Transformation/SPAM2010_aggregationADM1.R b/SPAM_Data_Transformation/SPAM2010_aggregationADM1.R index 884fce7276b92a27ceb51d3e30f6948cc6d18d7d..9a8095a69eae9a91cf13c8bb4796cdc7070eea2a 100644 --- a/SPAM_Data_Transformation/SPAM2010_aggregationADM1.R +++ b/SPAM_Data_Transformation/SPAM2010_aggregationADM1.R @@ -39,6 +39,9 @@ dat_TL_ha <- read_csv("spam2010V2r0_global_H_TL.csv",locale = locale(encoding = dat_TS_ha <- read_csv("spam2010V2r0_global_H_TS.csv",locale = locale(encoding = "LATIN2")) dat_TR_ha <- read_csv("spam2010V2r0_global_H_TR.csv",locale = locale(encoding = "LATIN2")) +test <- read_csv("Test_distinct.txt") + + # Create a spatial SPAM file geo_spam=dplyr::select(dat_TA_pr, x,y, cell5m, name_adm1, iso3) # %>% @@ -62,23 +65,23 @@ Admin2 = st_read(here::here("Input_data","GAUL_GADM_SPAM_ULRIKE", "g2008_2_2020_ Admin1Adj = st_read(here::here("Input_data","g2008_1_now", "g2008_1.shp"), options = "ENCODING=LATIN2")#WINDOWS-1252 # Now the values from GGS that are still not matching the SPAM2010 adm1 values are recoded -Admin1Ad2 = Admin1Adj %>% as_tibble() %>% +Admin1Adj2 = Admin1Adj %>% as_tibble() %>% mutate(NAME1 = case_when(ADM0_CODE == 196 & #Philipines NAME1 =="Autonomous region in Muslim Mindanao (ARMM)"~ "Autonomous region in Muslim Mi", TRUE ~ NAME1)) %>% mutate(NAME1 = case_when(ADM0_CODE == 196 & #Philipines NAME1 =="Region IX (Zamboanga Peninsula)" ~ "Region IX (Zamboanga Peninsula", TRUE ~ NAME1)) %>% - mutate(NAME1 = case_when(ADM0_CODE == 19 & #Azerbaijan + mutate(NAME1 = case_when(#ADM0_CODE == 19 & #Azerbaijan NAME1 =="Administrative unit not available" ~ "Administrative unit not availa", TRUE ~ NAME1)) %>% - mutate(NAME1 = case_when(ADM0_CODE == 152 & #Malawi + mutate(NAME1 = case_when(#ADM0_CODE == 152 & #Malawi NAME1 =="Area under National Administration" ~ "Area under National Administra", TRUE ~ NAME1)) %>% mutate(NAME1 = case_when(ADM0_CODE == 196 & #Philipines NAME1 =="Cordillera Administrative region (CAR)" ~ "Cordillera Administrative regi", TRUE ~ NAME1)) %>% - mutate(NAME1 = case_when(ADM0_CODE == 196 & #Philipines + mutate(NAME1 = case_when(#ADM0_CODE == 196 & #Philipines NAME1 =="Autonomous region in Muslim Mindanao (ARMM)"~ "Autonomous region in Muslim Mi", TRUE ~ NAME1)) %>% mutate(NAME1 = case_when(ADM0_CODE == 108 & #Haiti @@ -88,14 +91,10 @@ Admin1Ad2 = Admin1Adj %>% as_tibble() %>% NAME1 =="Nor Oriental" ~ "Gracias A Dios", TRUE ~ NAME1)) -# issue1: Some country like cote d ivoire are still wrongly written. Check out windows decoding. -# Issue2: Check out also where to combine the arable land. - -# ENCODING=WINDOWS-1252 # Define projection for GAULGADMSPAM proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below proj_meter = "+datum=WGS84 +units=m +no_defs" -Admin1 <- st_as_sf(x = Admin1Adj, +AdminAdj2.2 <- st_as_sf(x = Admin1Adj2, # coords = c("x", "y"), crs = proj) @@ -106,27 +105,35 @@ Admin1 <- st_as_sf(x = Admin1Adj, # The matching names are in columns Adm0_name, name1 and name2.# # Columns which have names O_fips1, O_fips2, O_name1 and O_name2 refer to an “old” classification system at level 1 and level 2. Its entries differ from the columns without “O_” in some EU countries and a few others where the statistic units did not match the admin units. Please note that the shape files themselves were not modified, only the entries in the dbf tables. -# adm1 unit match check -spam_admin_link = - dat_TA_pr%>% dplyr::select(iso3, cell5m, name_cntr, name_adm1) %>% - distinct_at(vars(name_adm1), .keep_all = T) #%>% - full_join(., Admin1Adj, by=c("name_adm1"="NAME1")) - -adm1_distinct=Admin1Adj %>% as_tibble() %>% - distinct_at(vars(NAME1), .keep_all = T) -str(Admin1) - -adm1_distinct=as_tibble(Admin1) %>% - distinct_at(vars(FIPS1))#, .keep_all = T) - -# adm2 units -adm2o_distinct=as_tibble(Admin1) %>% - distinct_at(vars(O_NAME))#, .keep_all = T) - -adm2_distinct=as_tibble(Admin1) %>% - distinct_at(vars(FIPS1))#, .keep_all = T) - -# Validation - Making sure SPAM2010 matches with admin Shapefiles +# # adm1 unit match check +# spam_admin_link = dat_TA_pr%>% dplyr::select(iso3, cell5m, name_cntr, name_adm1) %>% +# group_by(name_adm1, iso3) %>% +# distinct_at(vars(name_adm1), .keep_all = T) %>% +# full_join(., AdminAdj2.2, by=c("name_adm1"="NAME1")) +# +# # I need to get the adm1 units unique per country otherwise generic terms will be merged (like no "administrative units") +# adm1_distinctAdj=AdminAdj2.2 %>% as_tibble() %>% +# group_by(NAME1, ADM0_NAME) %>% +# distinct_at(vars(NAME1), .keep_all = T) +# # str(Admin1) +# +# # test +# test1=test %>% as_tibble() %>% +# group_by(alpha, beta) %>% +# distinct_at(vars(beta), .keep_all = T) +# str(Admin1) +# +# adm1_distinct=as_tibble(Admin1) %>% +# distinct_at(vars(FIPS1))#, .keep_all = T) +# +# # adm2 units +# adm2o_distinct=as_tibble(Admin1) %>% +# distinct_at(vars(O_NAME))#, .keep_all = T) +# +# adm2_distinct=as_tibble(Admin1) %>% +# distinct_at(vars(FIPS1))#, .keep_all = T) +# +# # Validation - Making sure SPAM2010 matches with admin Shapefiles # Adm0 - unique countries # SPAM2010 @@ -169,31 +176,83 @@ join_SPAM_adm1 = st_join(Admin1, geo_spam1, join = st_intersects, largest = T) # 25 adm1 units needs to be added to the representative units. Otherwise, the total arable land will be off. TODO! # Disinct GGS adjusted -adm1_distinctAdj=Admin1Adj2 %>% as_tibble() %>% - distinct_at(vars(NAME1))#, .keep_all = T) +adm1_distinctAdj=AdminAdj2.2 %>% as_tibble() %>% + group_by(NAME1, ADM0_NAME) %>% + distinct_at(vars(NAME1), .keep_all = T) #2,741 rows # Distinct SPAM2010 spam_admin_link = dat_TA_pr %>% dplyr::select(iso3, cell5m, name_cntr, name_adm1) %>% - distinct_at(vars(name_adm1))#, .keep_all = T) %>% + group_by(name_adm1, iso3) %>% + distinct_at(vars(name_adm1), .keep_all = T) #2,493 (diff=248) - -# Left joining the SPAM2010 and GGS adjusted. THe result is not yet matching in terms of rows -FINAL_adm1_spam2010_GGS = dat_TA_pr %>% dplyr::select(iso3, cell5m, name_cntr, name_adm1) %>% - distinct_at(vars(name_adm1), .keep_all = T) #%>% - left_join(., adm1_distinctAdj, by=c("name_adm1" = "NAME1")) %>% - dplyr::select(iso3, cell5m, name_adm1, ADM1_CODE, FIPS1, FIPS0) #%>% - # drop_na() - -write_csv(FINAL_adm1_spam2010_GGS, here::here("Input_data", "test67.csv")) - - -# Difference between "FINAL_adm1_spam2010_GGS" (rows 2493 ) AND "spam_admin_link" (rows 2468). -# The difference of 25 rows is due to 8 adm1 district available in the SPAM2010 but not available in the GGS dataset. -# Further, spelling errors could be a reason as well. - - +# Left joining the SPAM2010 and GGS adjusted. +# This function's result is the final Shapefile that matches SPAM2010 adm1 +# (-17 that are not in the GGS but will be merged into other adm1 units) +FINAL_adm1_spam2010_GGS_distinct = dat_TA_pr %>% dplyr::select(iso3, cell5m, name_cntr, name_adm1) %>% + group_by(name_adm1, iso3) %>% + distinct_at(vars(name_adm1), .keep_all = T) %>% + left_join(., AdminAdj2.2, by=c("name_adm1" = "NAME1")) %>% + # dplyr::select(iso3, cell5m, name_adm1, ADM1_CODE, FIPS1, FIPS0) %>% + drop_na(ADM1_CODE) %>% + distinct_at(vars(name_adm1), .keep_all = T) + +# Converting to shapefile +ADM1_SPAM_GGS = st_as_sf(FINAL_adm1_spam2010_GGS_distinct) +proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below +proj_meter = "+datum=WGS84 +units=m +no_defs" +ADM1_SPAM_GGS <- st_as_sf(x = ADM1_SPAM_GGS, + # coords = c("x", "y"), + crs = proj) + +# Writing Adm1 zone files tabular and spatial +write_csv(FINAL_adm1_spam2010_GGS, here::here("Input_data", "Core_data", "ADM1_SPAM2010_GGS")) +st_write(ADM1_SPAM_GGS, here::here("Input_data", "Core_data", "ADM1_SPAM2010_GGS_geo.shp"), + driver = "ESRI Shapefile") + +# Mind the LATIN2 encoding when reading in the ADM1_SPAM_GGS +ADM1_SPAM_GGS = st_read(here::here("Input_data", "Core_data", "ADM1_SPAM2010_GGS_geo.shp"), options = "ENCODING=LATIN2") + + +# Validation questions: -------------------------------------------------- +# 1. What is the amount of adm1 units in the initial SPAM2010 data? +# SPAM2010/Adm1: Without grouping per country = 2,493 +# SPAM2010/ADm1: With grouping per country = 2,586 (-> Diff=93) + +# 2. How many adm1 units are not represented in SPAm2010 but are in GGS? +# with drop na: 2569 >> The amount of NAs in a left-join indicates which values in SPAM2010 are not available in GGS. +# without drop na: 5869 # Difference between so 2869-2569=300 units are not in SPAM that would actually be in GGS. +# The number seems very high but I saw also that these reperesent many tiny islands around the world. So we just +# work with the assumption that SPAM2010 considered which adm1 units were important for crop growth and which not. + +# 3. How many units are there in the SPAM2010 but not in GGS? +# 2586 (without drop_na) >> this indicates the df after left_join includin the na's indicating the adm1 units in SPAM but not in GGS +# 2569(with drop na) -> Difference between na and no-na is 17 -> These are the values that are in SPAM but +# 17 adm1 units are that are in SPAM2010 are not in GGS! + +# 4. What are the adm1 units that need merging? +# From the set of adm1 nits that are in SPAM but not in GGS we need to filter those who are combined together. +# Only then we can sum the correct area (both physical and harvested). + +# iso3 name_adm1 Merged with +# GBR North East (UK) East Midlands (UK) +# GBR North West (UK) East Midlands (UK) +# GBR Yorkshire and The Humber East Midlands (UK) +# GBR West Midlands (UK) East Midlands (UK) +# GBR East of England East Midlands (UK) +# GBR South East (UK) East Midlands (UK) +# GBR South West (UK) East Midlands (UK) +# GBR London East Midlands (UK) +# CZE Strednì Morava Strední Cechy +# MKD Kriva Palanka Kumanova +# HND Nor Oriental Gracias A Dios +# ECU Nororiente Sucumbios +# MWI Mzuzu Karonga +# MWI Salima Kasungu +# MWI Lilongwe Kasungu +# MWI Blantyre Machinga +# MWI Shire valley Machinga # With aggregation long ----------------------------------------------------- ChangeNames <- function(x) { ## Renaming function diff --git a/SPAM_Data_Transformation/SPAM_GSS_ADM1_ZONES.R b/SPAM_Data_Transformation/SPAM_GSS_ADM1_ZONES.R new file mode 100644 index 0000000000000000000000000000000000000000..92ad8b49c25cba280591a1a1ed9b10ef82cd1676 --- /dev/null +++ b/SPAM_Data_Transformation/SPAM_GSS_ADM1_ZONES.R @@ -0,0 +1,255 @@ +easypackages::packages(c("tidyverse", 'sf', 'raster','sp','dplyr','spatialEco', + 'exactextractr','tibble', 'here', "readr", + "readxl", "foreign", "data.table", "rbenchmark", "benchmarkme", + "validate", "plyr")) + +# Set working directory +setwd(here::here("Input_data")) + +# Load data ---------------------------------------------------------------- +# Yields ------------------------------------------------------------------ +# dat_TA_yld <- read.csv("spam2010V2r0_global_Y_TA.csv", locale = locale(encoding = "LATIN2"))# Yields - all technologies together, ie complete crop - Data downloaded on 1. of September 2020 +# dat_TI_yld <- read_csv("spam2010V2r0_global_Y_TI.csv", locale = locale(encoding = "LATIN2"))# Yields - Iirrigated portion of crop +# dat_TH_yld <- read_csv("spam2010V2r0_global_Y_TH.csv", locale = locale(encoding = "LATIN2"))# Yields - rainfed high inputs portion of crop +# dat_TL_yld <- read_csv("spam2010V2r0_global_Y_TL.csv", locale = locale(encoding = "LATIN2"))# Yields - rainfed low inputs portion of crop +# dat_TS_yld <- read_csv("spam2010V2r0_global_Y_TS.csv",locale = locale(encoding = "LATIN2"))# Yields - rainfed subsistence portion of crop +# dat_TR_yld <- read_csv("spam2010V2r0_global_Y_TR.csv", locale = locale(encoding = "LATIN2"))# Yields - rainfed portion of crop (= TA - TI, or TH + TL + TS) + +# Production +dat_TA_pr <- read_csv("spam2010V2r0_global_P_TA.csv",locale = locale(encoding = "LATIN2")) # LATIN2 encoding is used for proper display of nordic and slavic names +# dat_TI_pr <- read_csv("spam2010V2r0_global_P_TI.csv",locale = locale(encoding = "LATIN2")) +# dat_TH_pr <- read_csv("spam2010V2r0_global_P_TH.csv",locale = locale(encoding = "LATIN2")) +# dat_TL_pr <- read_csv("spam2010V2r0_global_P_TL.csv",locale = locale(encoding = "LATIN2")) +# dat_TS_pr <- read_csv("spam2010V2r0_global_P_TS.csv",locale = locale(encoding = "LATIN2")) +# dat_TR_pr <- read_csv("spam2010V2r0_global_P_TR.csv",locale = locale(encoding = "LATIN2")) +# +# # Physical area-------------------------------- +# dat_TA_pa <- read_csv("spam2010V2r0_global_A_TA.csv",locale = locale(encoding = "LATIN2")) +# dat_TI_pa <- read_csv("spam2010V2r0_global_A_TI.csv",locale = locale(encoding = "LATIN2")) +# dat_TH_pa <- read_csv("spam2010V2r0_global_A_TH.csv",locale = locale(encoding = "LATIN2")) +# dat_TL_pa <- read_csv("spam2010V2r0_global_A_TL.csv",locale = locale(encoding = "LATIN2")) +# dat_TS_pa <- read_csv("spam2010V2r0_global_A_TS.csv",locale = locale(encoding = "LATIN2")) +# dat_TR_pa <- read_csv("spam2010V2r0_global_A_TR.csv",locale = locale(encoding = "LATIN2")) +# +# # Harvesting area---------------------------------- +# dat_TA_ha <- read_csv("spam2010V2r0_global_H_TA.csv",locale = locale(encoding = "LATIN2")) +# dat_TI_ha <- read_csv("spam2010V2r0_global_H_TI.csv",locale = locale(encoding = "LATIN2")) +# dat_TH_ha <- read_csv("spam2010V2r0_global_H_TH.csv",locale = locale(encoding = "LATIN2")) +# dat_TL_ha <- read_csv("spam2010V2r0_global_H_TL.csv",locale = locale(encoding = "LATIN2")) +# dat_TS_ha <- read_csv("spam2010V2r0_global_H_TS.csv",locale = locale(encoding = "LATIN2")) +# dat_TR_ha <- read_csv("spam2010V2r0_global_H_TR.csv",locale = locale(encoding = "LATIN2")) + +test <- read_csv("Test_distinct.txt") + + +# Create a spatial SPAM file +# geo_spam=dplyr::select(dat_TA_pr, x,y, cell5m, name_adm1, iso3) +# %>% +# distinct_at(vars(name_adm1)) + +# Make SPAM spatial +# projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0" +# proj_mercator = "+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs" +proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below +proj_meter = "+datum=WGS84 +units=m +no_defs" +geo_spam1 <- st_as_sf(x = geo_spam, + coords = c("x", "y"), + crs = proj) + +# Load administrative unit shapefile which matches the SPAM adm1 zones +# Admin0 = st_read(here::here("Input_data","GAUL_GADM_SPAM_ULRIKE", "g2008_0_now","g2008_0.shp")) +# Admin1 = st_read(here::here("Input_data","GAUL_GADM_SPAM_ULRIKE", "g2008_1_now","g2008_1.shp")) #this is what we need +# Admin2 = st_read(here::here("Input_data","GAUL_GADM_SPAM_ULRIKE", "g2008_2_2020_11_02","g2008_2.shp")) #this is what we need + +# Here I manually adjusted the NAME1 values to the once used in SPAM2010. see Documentation for more. +Admin1Adj = st_read(here::here("Input_data","g2008_1_now", "g2008_1.shp"), options = "ENCODING=LATIN2")#WINDOWS-1252 + +# Now the values from GGS that are still not matching the SPAM2010 adm1 values are recoded +Admin1Adj2 = Admin1Adj %>% as_tibble() %>% + mutate(NAME1 = case_when(ADM0_CODE == 196 & #Philipines + NAME1 =="Autonomous region in Muslim Mindanao (ARMM)"~ "Autonomous region in Muslim Mi", + TRUE ~ NAME1)) %>% + mutate(NAME1 = case_when(ADM0_CODE == 196 & #Philipines + NAME1 =="Region IX (Zamboanga Peninsula)" ~ "Region IX (Zamboanga Peninsula", + TRUE ~ NAME1)) %>% + mutate(NAME1 = case_when(#ADM0_CODE == 19 & #Azerbaijan + NAME1 =="Administrative unit not available" ~ "Administrative unit not availa", + TRUE ~ NAME1)) %>% + mutate(NAME1 = case_when(#ADM0_CODE == 152 & #Malawi + NAME1 =="Area under National Administration" ~ "Area under National Administra", + TRUE ~ NAME1)) %>% + mutate(NAME1 = case_when(ADM0_CODE == 196 & #Philipines + NAME1 =="Cordillera Administrative region (CAR)" ~ "Cordillera Administrative regi", + TRUE ~ NAME1)) %>% + mutate(NAME1 = case_when(#ADM0_CODE == 196 & #Philipines + NAME1 =="Autonomous region in Muslim Mindanao (ARMM)"~ "Autonomous region in Muslim Mi", + TRUE ~ NAME1)) %>% + mutate(NAME1 = case_when(ADM0_CODE == 108 & #Haiti + NAME1 =="NOROESTE" ~ "Nord-Est", + TRUE ~ NAME1)) %>% + mutate(NAME1 = case_when(ADM0_CODE == 111 & # Honduras + NAME1 =="Nor Oriental" ~ "Gracias A Dios", + TRUE ~ NAME1)) + +# Define projection for GAULGADMSPAM +proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below +proj_meter = "+datum=WGS84 +units=m +no_defs" +AdminAdj2.2 <- st_as_sf(x = Admin1Adj2, + # coords = c("x", "y"), + crs = proj) + +# Joining SPAM df and GAUL-GADM-SPAM -------------------------------------- + +# Ulrike's comment: Shape files for GAUL-GADM-SPAM +# Please note that the columns fips0, fips1 and fips2 are the ones which have the codes for admin level 0 (country), level 1 and level 2 which can be linked to SPAM results. +# The matching names are in columns Adm0_name, name1 and name2.# +# Columns which have names O_fips1, O_fips2, O_name1 and O_name2 refer to an “old” classification system at level 1 and level 2. Its entries differ from the columns without “O_” in some EU countries and a few others where the statistic units did not match the admin units. Please note that the shape files themselves were not modified, only the entries in the dbf tables. + +# # adm1 unit match check +# spam_admin_link = dat_TA_pr%>% dplyr::select(iso3, cell5m, name_cntr, name_adm1) %>% +# group_by(name_adm1, iso3) %>% +# distinct_at(vars(name_adm1), .keep_all = T) %>% +# full_join(., AdminAdj2.2, by=c("name_adm1"="NAME1")) +# +# # I need to get the adm1 units unique per country otherwise generic terms will be merged (like no "administrative units") +# adm1_distinctAdj=AdminAdj2.2 %>% as_tibble() %>% +# group_by(NAME1, ADM0_NAME) %>% +# distinct_at(vars(NAME1), .keep_all = T) +# # str(Admin1) +# +# # test +# test1=test %>% as_tibble() %>% +# group_by(alpha, beta) %>% +# distinct_at(vars(beta), .keep_all = T) +# str(Admin1) +# +# adm1_distinct=as_tibble(Admin1) %>% +# distinct_at(vars(FIPS1))#, .keep_all = T) +# +# # adm2 units +# adm2o_distinct=as_tibble(Admin1) %>% +# distinct_at(vars(O_NAME))#, .keep_all = T) +# +# adm2_distinct=as_tibble(Admin1) %>% +# distinct_at(vars(FIPS1))#, .keep_all = T) +# +# # Validation - Making sure SPAM2010 matches with admin Shapefiles + +# Adm0 - unique countries +# SPAM2010 +# 201 (name_cntr) +# 201 (iso3) + +# ADM1 GAUL_SPAM_GADM +# > 275 (ADM0_NAME) +# > 250 (FIPS0) +# + +# ADM1 +# 1. Same amount of rows when distinct adm1 names? + +# SPAM2010 Admin1 units +# 2493 (name_adm1) + +# ADM1 GAUL_SPAM_GADM +# 3230 (NAME1) +# 3079 (O_NAME1 +# 2898 (FIPS1) + +# ADM2 GAUL_SPAM_GADM +# 3080 (O_NAME1) +# 2758 (NAME1) +# 3222 (O_FIPS1) +# 2898 (FIPS1) + + +# How to deal with inconsistency? ----------------------------------------- +# 1. Spatial join +# 2. Fulljoin > detect NA's then check what is different + +# Spatial join +join_SPAM_adm1 = st_join(Admin1, geo_spam1, join = st_intersects, largest = T) + +# the final adm1 units are calculated by first full join and then by removing the NAs (=all variables that are not +# in the SPAM2010 and some that are not represented in the GGS (no shapes available for England subunits for example)). +# Finally, we have now 2468 adm1 units globally (of initially 2493 adm1 units; Diff: 25 adm1 units). The area of these +# 25 adm1 units needs to be added to the representative units. Otherwise, the total arable land will be off. TODO! + +# Disinct GGS adjusted +adm1_distinctAdj=AdminAdj2.2 %>% as_tibble() %>% + group_by(NAME1, ADM0_NAME) %>% + distinct_at(vars(NAME1), .keep_all = T) +#2,741 rows + +# Distinct SPAM2010 +spam_admin_link = dat_TA_pr %>% dplyr::select(iso3, cell5m, name_cntr, name_adm1) %>% + group_by(name_adm1, iso3) %>% + distinct_at(vars(name_adm1), .keep_all = T) +#2,493 (diff=248) + +# Left joining the SPAM2010 and GGS adjusted. +# This function's result is the final Shapefile that matches SPAM2010 adm1 +# (-17 that are not in the GGS but will be merged into other adm1 units) +FINAL_adm1_spam2010_GGS_distinct = dat_TA_pr %>% dplyr::select(iso3, cell5m, name_cntr, name_adm1) %>% + group_by(name_adm1, iso3) %>% + distinct_at(vars(name_adm1), .keep_all = T) %>% + left_join(., AdminAdj2.2, by=c("name_adm1" = "NAME1")) %>% + # dplyr::select(iso3, cell5m, name_adm1, ADM1_CODE, FIPS1, FIPS0) %>% + drop_na(ADM1_CODE) %>% + distinct_at(vars(name_adm1), .keep_all = T) + +# Converting to shapefile +ADM1_SPAM_GGS = st_as_sf(FINAL_adm1_spam2010_GGS_distinct) +proj = "+proj=longlat +datum=WGS84 +no_defs +units=m" # taken from raster file below +proj_meter = "+datum=WGS84 +units=m +no_defs" +ADM1_SPAM_GGS <- st_as_sf(x = ADM1_SPAM_GGS, + # coords = c("x", "y"), + crs = proj) + +# Writing Adm1 zone files tabular and spatial +write_csv(FINAL_adm1_spam2010_GGS, here::here("Input_data", "Core_data", "ADM1_SPAM2010_GGS")) +st_write(ADM1_SPAM_GGS, here::here("Input_data", "Core_data", "ADM1_SPAM2010_GGS_geo.shp"), + driver = "ESRI Shapefile") + +# Mind the LATIN2 encoding when reading in the ADM1_SPAM_GGS +ADM1_SPAM_GGS = st_read(here::here("Input_data", "Core_data", "ADM1_SPAM2010_GGS_geo.shp"), options = "ENCODING=LATIN2") + + +# Validation questions: -------------------------------------------------- +# 1. What is the amount of adm1 units in the initial SPAM2010 data? +# SPAM2010/Adm1: Without grouping per country = 2,493 +# SPAM2010/ADm1: With grouping per country = 2,586 (-> Diff=93) + +# 2. How many adm1 units are not represented in SPAm2010 but are in GGS? +# with drop na: 2569 >> The amount of NAs in a left-join indicates which values in SPAM2010 are not available in GGS. +# without drop na: 5869 # Difference between so 2869-2569=300 units are not in SPAM that would actually be in GGS. +# The number seems very high but I saw also that these reperesent many tiny islands around the world. So we just +# work with the assumption that SPAM2010 considered which adm1 units were important for crop growth and which not. + +# 3. How many units are there in the SPAM2010 but not in GGS? +# 2586 (without drop_na) >> this indicates the df after left_join includin the na's indicating the adm1 units in SPAM but not in GGS +# 2569(with drop na) -> Difference between na and no-na is 17 -> These are the values that are in SPAM but +# 17 adm1 units are that are in SPAM2010 are not in GGS! + +# 4. What are the adm1 units that need merging? +# From the set of adm1 nits that are in SPAM but not in GGS we need to filter those who are combined together. +# Only then we can sum the correct area (both physical and harvested). + +# iso3 name_adm1 Merged with +# GBR North East (UK) East Midlands (UK) +# GBR North West (UK) East Midlands (UK) +# GBR Yorkshire and The Humber East Midlands (UK) +# GBR West Midlands (UK) East Midlands (UK) +# GBR East of England East Midlands (UK) +# GBR South East (UK) East Midlands (UK) +# GBR South West (UK) East Midlands (UK) +# GBR London East Midlands (UK) +# CZE Strednì Morava Strední Cechy +# MKD Kriva Palanka Kumanova +# HND Nor Oriental Gracias A Dios +# ECU Nororiente Sucumbios +# MWI Mzuzu Karonga +# MWI Salima Kasungu +# MWI Lilongwe Kasungu +# MWI Blantyre Machinga +# MWI Shire valley Machinga \ No newline at end of file diff --git a/Zones/Zones.R b/Zones/Zones.R index 6c04a52c0986c7886f06d90d7547837c6a76f52c..153a84c07acd5668234186ad99b4b65f799a14d5 100644 --- a/Zones/Zones.R +++ b/Zones/Zones.R @@ -27,3 +27,21 @@ SPAM_GADM_JOIN_GIS = st_join(dat_cent, shape, join = st_within) #https://postgis head(SPAM_GADM_JOIN_GIS) class(SPAM_GADM_Join) +# Importing Zones with GAEZ from Spam ------------------------------------- + +zones=read_sf(here("Input_data","ARC_Pro_Input", "Zones_gaul2008_SpamID_AEZcode.shp")) +projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0" +zone1 <- st_as_sf(x = zones, + coords = c("x", "y"), + crs = projcrs) + +zone2=zone1 %>% dplyr::select(AREA, G2008_1_ID, ADM1_CODE, ADM0_NAME, + ADM1_NAME, CONTINENT, REGION, AEZ, iso3, cell5m, + alloc_key, name_cntr, name_adm1, name_adm1, + name_adm2, Shape_Area, geometry) %>% + group_by(ADM0_NAME, AEZ) %>% + summarize() + +plot(zone2) + +