Use gsub and mac unicode escaped char

56bf3551 · Languillaume, Antoine · bc325087 · 5e3e2365 · 56bf3551 · 56bf3551
Commit 56bf3551 authored 4 years ago by Languillaume, Antoine
--- a/1_data_cleaning.R
+++ b/1_data_cleaning.R
@@ -51,7 +51,10 @@ d$yield[id_NA] <- NA
 d$fertilizer <- stringi::stri_escape_unicode(d$fertilizer)
 # replace the unicode representation by "e" yielding "Efficiencie"
-d$fertilizer <- str_replace(d$fertilizer, "\\\\u00c3\\\\u0192\\\\u00c2\\\\u00ab", "e")
+#d$fertilizer <- str_replace(d$fertilizer, "\\\\u00c3\\\\u00ab\\\\u00eb", "e")
+##changed to grep which should work on Mac
+d$fertilizer <- gsub("\\\\u00eb", "e",d$fertilizer)
 ## Convert yield in ton per ha (tha),
 # here I use dplyr::mutate as an example

--- a/Master_Script.R
+++ b/Master_Script.R
 ##install all required packages
 # Install required packages if necessary
@@ -184,7 +183,15 @@ units[contains.unit]=unlist(lapply(colnames[contains.unit],function(x) x[2]))
 colnames=unlist(lapply(colnames,function(x) x[1]))
-new.frame= t1[(head.row+1):nrow(t1),1:length(colnames)]
+###now re-read files starting with first data row (to allow proper data type)
+if(grepl(".xlsx",wb)){
+new.frame <-read.xlsx(file,detectDates=T,sheet=st, colNames=F, startRow=(head.row+1))
+}
+if(grepl(".csv",wb)){
+new.frame<-read.csv(file, header =F,as.is =T, skip=(head.row))
+}
 colnames(new.frame)= colnames
@@ -207,9 +214,9 @@ var.frame=data.frame("workbook"= wb.name,"sheet"= sheet.names,"variable"= var.na
 #mid.names=c("ID","Country","Name region","Name site","Minimum latitude","Maximum latitude", "Minimum longitude","Maximum longitude","Experiment/survey","Type of experiment","Type of survey","On-farm/on-station","Crops","Animals","Soil type")
 #meta.data.frame=read.xlsx(paste(path.data.op,"Required_Metadata.xlsx",sep="/"))
-field.vec=c("Data ID", "Official title of the dataset", "Project name", "Description of project", "Author", "Author ID(ORCID)", "Contributor(s)", "Subject matter of research/Vocabulary", "Data origin", "Funder(s) or sponsor(s) of project", "creation date (m/d/yyyy)", "Embargo end date", "Citation", "keywords (AGROVOC)", "Country(ies) covered", "Point longitude coord. in Dec. Degrees ", "Agro-Ecological Zone(s)(FAO) covered", "Years covered by data", "Crops covered by data", "Animals covered by data", "Start date of data collection ", "End date of data collection ", "License (default=CC-BY)", "Permission given by email", "Rights", "Contact email")
+field.vec=c("Data ID", "Official title of the dataset", "Project name", "Description of project", "Author", "Author ID(ORCID)", "Contributor(s)", "Subject matter of research/Vocabulary", "Data origin", "Funder(s) or sponsor(s) of project", "creation date (m/d/yyyy)", "Embargo end date", "Citation", "keywords (AGROVOC)", "Country(ies) covered", "Point geographic coordinates in Dec. Degrees ", "Agro-Ecological Zone(s)(FAO) covered", "Years covered by data", "Crops covered by data", "Animals covered by data", "Start date of data collection ", "End date of data collection ", "License (default=CC-BY)", "Permission given by email", "Rights", "Contact email")
-field_name.vec=c("data.id", "data.title", "project.name", "project.description", "author", "orcid", "contributors", "subject.research", "data.origin", "donor", "date.creation", "date.embargo", "citation", "keywords.agrovoc", "countries", "longitude", "aez", "years", "crops", "animals", "date.collect.start", "date.collect.end", "licence", "permission", "rights", "contact.mail")
+field_name.vec=c("data.id", "data.title", "project.name", "project.description", "author", "orcid", "contributors", "subject.research", "data.origin", "donor", "date.creation", "date.embargo", "citation", "keywords.agrovoc", "countries", "longitude.latitude", "aez", "years", "crops", "animals", "date.collect.start", "date.collect.end", "licence", "permission", "rights", "contact.mail")
 meta.data.frame=data.frame(field= field.vec,field_name= field_name.vec,values=NA, stringsAsFactors = F)