Commit 23706450 authored by Jasper Koehorst's avatar Jasper Koehorst
Browse files

ncbi bioprojects initiation project

parent fc6c1d6a
Pipeline #37167 passed with stage
in 2 minutes and 43 seconds
......@@ -3,3 +3,4 @@
/.gradle/
/target/
/unlock/
json
......@@ -85,6 +85,8 @@ dependencies {
// Download button functionality
implementation 'org.vaadin.stefan:lazy-download-button:1.0.0'
implementation 'org.json:json:20180130'
}
dependencyManagement {
......
package nl.fairbydesign.backend.data.objects;
public class DiskUsage {
private String identifier;
private String project;
private String investigation;
private long size;
public void setIdentifier(String identifier) {
this.identifier = identifier;
public void setProject(String project) {
this.project = project;
}
public String getIdentifier() {
return identifier;
public String getProject() {
return project;
}
public void setSize(long size) {
......@@ -19,4 +20,12 @@ public class DiskUsage {
public long getSize() {
return size;
}
public String getInvestigation() {
return investigation;
}
public void setInvestigation(String investigation) {
this.investigation = investigation;
}
}
......@@ -4,6 +4,7 @@ import nl.fairbydesign.backend.data.objects.Biom;
import nl.fairbydesign.backend.data.objects.DiskUsage;
import nl.fairbydesign.backend.data.objects.Process;
import org.apache.commons.lang3.StringUtils;
import org.apache.tomcat.jni.Proc;
import org.irods.jargon.core.checksum.ChecksumValue;
import org.irods.jargon.core.checksum.LocalChecksumComputerFactory;
import org.irods.jargon.core.checksum.LocalChecksumComputerFactoryImpl;
......@@ -19,29 +20,44 @@ import org.irods.jargon.core.pub.io.IRODSFile;
import org.irods.jargon.core.pub.io.IRODSFileFactory;
import org.irods.jargon.core.query.*;
import org.jboss.logging.Logger;
import org.jermontology.ontology.JERMOntology.domain.process;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.lang.reflect.Array;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.*;
import static org.irods.jargon.core.query.QueryConditionOperators.LIKE;
import static org.irods.jargon.core.query.QueryConditionOperators.NOT_LIKE;
import static org.irods.jargon.core.query.RodsGenQueryEnum.*;
public class Data {
public static final Logger logger = Logger.getLogger(Data.class);
/**
* Obtains Project, investigation and study information combined with yaml status and workflow
* @param irodsAccount the irods account for the query
* @return
*/
public static ArrayList<Process> getPIS(IRODSAccount irodsAccount) {
try {
// Obtain all projects via metadata
IRODSGenQueryBuilder queryBuilder = new IRODSGenQueryBuilder(true, null);
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_META_COLL_ATTR_NAME, QueryConditionOperators.EQUAL, "type");
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_META_COLL_ATTR_VALUE, QueryConditionOperators.EQUAL, "Study");
queryBuilder.addSelectAsGenQueryValue(RodsGenQueryEnum.COL_COLL_NAME);
// iquest --no-page "%s %s %s" "SELECT COLL_NAME, META_DATA_ATTR_VALUE, META_DATA_ATTR_UNITS WHERE
// COLL_NAME NOT LIKE '/unlock/references%'
// AND
// COLL_NAME NOT LIKE '/unlock/trash%' AND
// DATA_NAME LIKE '%.yaml'"
queryBuilder.addConditionAsGenQueryField(COL_COLL_NAME, LIKE, "/unlock/projects%");
queryBuilder.addConditionAsGenQueryField(COL_DATA_NAME, LIKE, "%.yaml");
queryBuilder.addSelectAsGenQueryValue(COL_COLL_NAME);
queryBuilder.addSelectAsGenQueryValue(COL_META_DATA_ATTR_VALUE);
queryBuilder.addSelectAsGenQueryValue(COL_META_DATA_ATTR_UNITS);
// Set limit?
IRODSGenQueryFromBuilder query = queryBuilder.exportIRODSQueryFromBuilder(999999);
......@@ -52,27 +68,60 @@ public class Data {
IRODSQueryResultSet irodsQueryResultSet = irodsGenQueryExecutor.executeIRODSQuery(query, 0);
List<IRODSQueryResultRow> irodsQueryResultSetResults = irodsQueryResultSet.getResults();
ArrayList<Process> proccesses = new ArrayList<>();
// unlock [0]
// projects [1]
// project [2]
// investigation [3]
// study [4]
HashMap<String, Process> processHashMap = new HashMap<>();
for (IRODSQueryResultRow irodsQueryResultRow : irodsQueryResultSetResults) {
Process process = new Process();
String studyPath = irodsQueryResultRow.getColumn(0);
String studyIdentifier = new File(studyPath).getName();
String investigationIdentifier = new File(studyPath).getParentFile().getName();
String projectIdentifier = new File(studyPath).getParentFile().getParentFile().getName();
process.setProjectIdentifier(projectIdentifier);
process.setInvestigationIdentifier(investigationIdentifier);
process.setStudyIdentifier(studyIdentifier);
process.setPath(studyPath);
Collection<Process> processCollection = getJobInformation(irodsAccount, process);
proccesses.addAll(processCollection);
String workflow = irodsQueryResultRow.getColumn(1);
String unit = irodsQueryResultRow.getColumn(2);
String first = irodsQueryResultRow.getColumn(0).split("/")[1];
String second = irodsQueryResultRow.getColumn(0).split("/")[2];
String project = irodsQueryResultRow.getColumn(0).split("/")[3];
String investigation = irodsQueryResultRow.getColumn(0).split("/")[4];
String study = irodsQueryResultRow.getColumn(0).split("/")[5];
String path = "/" + first + "/" + second + "/" + project + "/" + investigation +"/"+study;
// For this path with this workflow
String id = path + " " + workflow;
Process process;
if (processHashMap.containsKey(id)) {
process = processHashMap.get(id);
} else {
process = new Process();
process.setProjectIdentifier(project);
process.setInvestigationIdentifier(investigation);
process.setStudyIdentifier(study);
process.setPath(path);
process.setWorkflow(workflow);
processHashMap.put(id, process);
}
// Status
if (unit.equals("waiting")) {
process.setWaiting(process.getWaiting() + 1);
} else if (unit.equals("running")) {
process.setRunning(process.getRunning() + 1);
} else if (unit.equals("finished")) {
process.setFinished(process.getFinished() +1);
} else if (unit.equals("failed")) {
logger.error(process.getPath() +" " + workflow +" " + unit);
process.setFailed(process.getFailed() + 1);
} else if (unit.equals("queue")) {
process.setQueue(process.getQueue() + 1);
} else {
logger.error("Status unknown" + unit);
}
}
return proccesses;
} catch (JargonException e) {
e.printStackTrace();
} catch (JargonQueryException e) {
e.printStackTrace();
} catch (GenQueryBuilderException e) {
return new ArrayList(processHashMap.values());
} catch (JargonException | JargonQueryException | GenQueryBuilderException e) {
e.printStackTrace();
}
return new ArrayList<>();
......@@ -83,7 +132,7 @@ public class Data {
// Obtain all projects via metadata
IRODSGenQueryBuilder queryBuilder = new IRODSGenQueryBuilder(true, null);
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_COLL_NAME, QueryConditionOperators.LIKE, process.getPath() + "%");
queryBuilder.addConditionAsGenQueryField(COL_COLL_NAME, QueryConditionOperators.LIKE, process.getPath() + "%");
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_DATA_NAME, QueryConditionOperators.LIKE, "%.yaml");
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_META_DATA_ATTR_NAME, QueryConditionOperators.EQUAL, "cwl");
......@@ -146,7 +195,7 @@ public class Data {
// Obtain all disk usage for each project
IRODSGenQueryBuilder queryBuilder = new IRODSGenQueryBuilder(true, null);
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_COLL_NAME, QueryConditionOperators.LIKE, "/" + irodsAccount.getZone() + "/projects/" + process.getProjectIdentifier() + "%");
queryBuilder.addConditionAsGenQueryField(COL_COLL_NAME, QueryConditionOperators.LIKE, "/" + irodsAccount.getZone() + "/projects/" + process.getProjectIdentifier() + "/" + process.getInvestigationIdentifier() + "%");
// Column name as default does a distinct?
queryBuilder.addSelectAsAgregateGenQueryValue(RodsGenQueryEnum.COL_DATA_SIZE, GenQueryField.SelectFieldTypes.SUM);
......@@ -161,7 +210,8 @@ public class Data {
List<IRODSQueryResultRow> irodsQueryResultSetResults = irodsQueryResultSet.getResults();
DiskUsage diskUsage = new DiskUsage();
diskUsage.setIdentifier(process.getProjectIdentifier());
diskUsage.setProject(process.getProjectIdentifier());
diskUsage.setInvestigation(process.getInvestigationIdentifier());
for (IRODSQueryResultRow irodsQueryResultRow : irodsQueryResultSetResults) {
String size = irodsQueryResultRow.getColumn(0).trim().strip();
if (StringUtils.isNumeric(size) && size.length() > 0) {
......@@ -283,7 +333,7 @@ public class Data {
IRODSGenQueryBuilder queryBuilder = new IRODSGenQueryBuilder(true, null);
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_COLL_NAME, QueryConditionOperators.LIKE, trashFolder.getAbsolutePath() + "%");
queryBuilder.addConditionAsGenQueryField(COL_COLL_NAME, QueryConditionOperators.LIKE, trashFolder.getAbsolutePath() + "%");
queryBuilder.addSelectAsAgregateGenQueryValue(RodsGenQueryEnum.COL_DATA_SIZE, GenQueryField.SelectFieldTypes.SUM);
// Set limit?
......@@ -313,9 +363,9 @@ public class Data {
// Obtain all projects via metadata
IRODSGenQueryBuilder queryBuilder = new IRODSGenQueryBuilder(true, null);
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_COLL_NAME, QueryConditionOperators.LIKE, search);
queryBuilder.addConditionAsGenQueryField(COL_COLL_NAME, QueryConditionOperators.LIKE, search);
queryBuilder.addSelectAsGenQueryValue(RodsGenQueryEnum.COL_COLL_NAME);
queryBuilder.addSelectAsGenQueryValue(COL_COLL_NAME);
// Set limit?
IRODSGenQueryFromBuilder query = queryBuilder.exportIRODSQueryFromBuilder(999999);
......@@ -376,7 +426,7 @@ public class Data {
// Obtain all projects via metadata
IRODSGenQueryBuilder queryBuilder = new IRODSGenQueryBuilder(true, null);
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_DATA_SIZE, QueryConditionOperators.NUMERIC_GREATER_THAN_OR_EQUAL_TO, Long.parseLong(megabytes) * 1024 * 1024);
queryBuilder.addSelectAsGenQueryValue(RodsGenQueryEnum.COL_COLL_NAME);
queryBuilder.addSelectAsGenQueryValue(COL_COLL_NAME);
queryBuilder.addSelectAsGenQueryValue(RodsGenQueryEnum.COL_DATA_NAME);
queryBuilder.addSelectAsGenQueryValue(RodsGenQueryEnum.COL_DATA_SIZE);
queryBuilder.addSelectAsGenQueryValue(RodsGenQueryEnum.COL_D_MODIFY_TIME);
......@@ -406,7 +456,7 @@ public class Data {
queryBuilder = new IRODSGenQueryBuilder(true, null);
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_DATA_SIZE, QueryConditionOperators.NUMERIC_GREATER_THAN_OR_EQUAL_TO, Long.parseLong(megabytes) * 1024 * 1024);
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_META_DATA_ATTR_NAME, QueryConditionOperators.EQUAL, "RESOURCE");
queryBuilder.addSelectAsGenQueryValue(RodsGenQueryEnum.COL_COLL_NAME);
queryBuilder.addSelectAsGenQueryValue(COL_COLL_NAME);
queryBuilder.addSelectAsGenQueryValue(RodsGenQueryEnum.COL_DATA_NAME);
queryBuilder.addSelectAsGenQueryValue(RodsGenQueryEnum.COL_DATA_SIZE);
queryBuilder.addSelectAsGenQueryValue(RodsGenQueryEnum.COL_META_DATA_ATTR_VALUE);
......
package nl.fairbydesign.backend.ncbi;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.MapperFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import nl.fairbydesign.backend.ncbi.objects.esearch.Root;
import nl.fairbydesign.backend.parsers.ExcelGenerator;
import org.apache.jena.sparql.function.library.print;
import org.apache.poi.ddf.EscherTertiaryOptRecord;
import org.jboss.logging.Logger;
import org.json.JSONObject;
import org.json.XML;
import org.junit.Test;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Scanner;
import static nl.fairbydesign.backend.ncbi.XLSX.setup;
import static nl.fairbydesign.backend.ncbi.XLSX.studies;
public class NCBI {
public static final Logger logger = Logger.getLogger(ExcelGenerator.class);
public static HashSet<nl.fairbydesign.backend.ncbi.objects.esample.Root> roots = new HashSet<nl.fairbydesign.backend.ncbi.objects.esample.Root>();
@Test
public void fetch() throws IOException {
// Folder structure
String rootFolder = "./json/";
new File(rootFolder).mkdirs();
// String[] terms = {"PRJNA4", "PRJNA342081", "PRJNA342082", "PRJNA342083", "PRJNA342085", "PRJNA636830", "PRJNA637186", "PRJNA640606", "PRJNA641267", "PRJNA644470", "PRJNA644472", "PRJNA644946", "PRJNA692806", "PRJNA723175", "PRJNA726329", "PRJNA796221", "PRJNA796733", "PRJNA797524", "PRJNA797710", "PRJNA797893", "PRJNA798161", "PRJNA587532", "PRJNA623491", "PRJNA623492", "PRJNA623493", "PRJNA623494", "PRJNA673740", "PRJNA768342", "PRJNA768343", "PRJNA768621", "PRJNA788700", "PRJNA792431", "PRJNA793339", "PRJNA793432", "PRJNA797187", "PRJNA797358", "PRJNA797692", "PRJNA797723", "PRJNA797901", "PRJNA797911", "PRJNA797938", "PRJNA798206", "PRJNA799654", "PRJNA799658", "PRJNA799665", "PRJNA799446", "PRJNA799448", "PRJNA799450", "PRJNA799453", "PRJNA799461", "PRJNA799470", "PRJNA799474", "PRJNA600082", "PRJNA600109", "PRJNA600335", "PRJNA600366", "PRJNA600548", "PRJNA600549", "PRJNA600560", "PRJNA600580", "PRJNA600584", "PRJNA600588", "PRJNA600590", "PRJNA600612", "PRJNA600613", "PRJNA600614", "PRJNA600615", "PRJNA600616", "PRJNA600618", "PRJNA600712", "PRJNA600724", "PRJNA600725", "PRJNA600726", "PRJNA600727", "PRJNA600728", "PRJNA600731", "PRJNA600732", "PRJNA600733", "PRJNA600734", "PRJNA640475", "PRJNA686140", "PRJNA686142", "PRJNA686144", "PRJNA686145", "PRJNA727731", "PRJNA771178", "PRJNA784705", "PRJNA784707", "PRJNA784714", "PRJNA794000", "PRJNA799136", "PRJNA799145", "PRJNA799150", "PRJNA799154", "PRJNA799156", "PRJNA799162", "PRJNA799168", "PRJNA799170", "PRJNA799171", "PRJNA799174", "PRJNA799175", "PRJNA799177", "PRJNA799178", "PRJNA799179", "PRJNA799181", "PRJNA799182", "PRJNA799183", "PRJNA799186", "PRJNA799189", "PRJNA799193", "PRJNA799198", "PRJNA799204", "PRJNA799205", "PRJNA799208", "PRJNA799209", "PRJNA799210", "PRJNA799211", "PRJNA799212", "PRJNA799213", "PRJNA799215", "PRJNA799216", "PRJNA799217", "PRJNA799218", "PRJNA799219", "PRJNA799222", "PRJNA799223", "PRJNA799224", "PRJNA799225", "PRJNA799226", "PRJNA799227", "PRJNA799228", "PRJNA799229", "PRJNA799235", "PRJNA799236", "PRJNA799238", "PRJNA799242", "PRJNA799243", "PRJNA799246", "PRJNA799251", "PRJNA799254", "PRJNA799262", "PRJNA799266", "PRJNA799267", "PRJNA799299", "PRJNA799302", "PRJNA799304", "PRJNA799312", "PRJNA799313", "PRJNA799322", "PRJNA799324", "PRJNA799330", "PRJNA799331", "PRJNA799333", "PRJNA799337", "PRJNA799338", "PRJNA799341", "PRJNA799347", "PRJNA799354", "PRJNA799357", "PRJNA799358", "PRJNA799359", "PRJNA799361", "PRJNA799362", "PRJNA799363", "PRJNA799367", "PRJNA799373", "PRJNA799374", "PRJNA799375", "PRJNA799378", "PRJNA799381", "PRJNA799390", "PRJNA799391", "PRJNA799396", "PRJNA799399", "PRJNA799400", "PRJNA799401", "PRJNA799402", "PRJNA799438", "PRJNA799440", "PRJNA506732", "PRJNA673593", "PRJNA698757", "PRJNA715509", "PRJNA741251", "PRJNA765537", "PRJNA766865", "PRJNA777023", "PRJNA783781", "PRJNA784295", "PRJNA785128", "PRJNA785354", "PRJNA785356"};
Scanner scanner = new Scanner(new File("bioprojects.txt"));
while (scanner.hasNextLine()) {
String term = scanner.nextLine().strip();
System.err.println(term);
File path = makePath(rootFolder + "/search/", term);
File outputFile = new File(path +"/"+ term + ".json");
System.err.println(outputFile);
// if (1==1) continue;
// Download the json sample/data file
if (!outputFile.exists()) {
String jsonString = search("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=sra&term=" + term + "&retmax=3000000&tool=fairds&email=fairds%40unlock.nl");
PrintWriter printWriter = new PrintWriter(outputFile);
printWriter.println(jsonString);
printWriter.close();
}
String jsonString = Files.readString(outputFile.toPath());
// TODO multiple type mapper for IdList, string or list of integers
if (jsonString.contains("IdList\": \"\"")) {
logger.info("Nothing found for " + term);
continue;
} else {
logger.info("Obtaining results for " + term);
}
// JSON string mapper to object with several configuration options
ObjectMapper objectMapper = new ObjectMapper();
objectMapper.configure(DeserializationFeature.ACCEPT_SINGLE_VALUE_AS_ARRAY, true);
objectMapper.configure(MapperFeature.ACCEPT_CASE_INSENSITIVE_PROPERTIES, true);
objectMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL);
objectMapper.setSerializationInclusion(JsonInclude.Include.NON_EMPTY);
// https://json2csharp.com/json-to-pojo to generate the source code
// PrintWriter printWriterX = new PrintWriter("bla.json");
// printWriterX.println(jsonString);
// printWriterX.close();
Root root = objectMapper.readValue(jsonString, Root.class);
ArrayList<String> ids = root.getESearchResult().getIdList().getId();
for (String id : ids) {
// System.err.println(ids.indexOf(id) + " " + root.getESearchResult().getIdList().getId().size());
path = makePath(rootFolder, id);
outputFile = new File(path + "/" + id + ".json");
// Download the json sample/data file
if (!outputFile.exists()) {
jsonString = search("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=sra&id=" + id + "&rettype=xml&retmode=text&tool=fairds&email=fairds%40unlock.nl");
PrintWriter printWriter = new PrintWriter(outputFile);
printWriter.println(jsonString);
printWriter.close();
}
jsonString = Files.readString(outputFile.toPath());
// For validation purposes
try {
nl.fairbydesign.backend.ncbi.objects.esample.Root sampleRoot = objectMapper.readValue(jsonString, nl.fairbydesign.backend.ncbi.objects.esample.Root.class);
// roots.add(sampleRoot);
} catch (IOException e) {
e.printStackTrace();
PrintWriter printWriter = new PrintWriter("failed.json");
printWriter.println(jsonString);
printWriter.close();
return;
}
}
}
setup(roots);
}
private File makePath(String rootFolder, String id) {
StringBuilder rootFolderBuilder = new StringBuilder(rootFolder);
int length = Integer.parseInt(String.valueOf(id.toCharArray().length / 4.0).split("\\.")[0]);
for (int i = 0; i < id.toCharArray().length; i++) {
if (i % 4 == 0 && i > 0) {
rootFolderBuilder.append("/");
new File(rootFolderBuilder.toString()).mkdirs();
return new File(rootFolderBuilder.toString());
}
rootFolderBuilder.append(id.toCharArray()[i]);
}
new File(rootFolderBuilder.toString()).mkdirs();
return new File(rootFolderBuilder.toString());
// new File(rootFolderBuilder.toString()).mkdirs();
// return new File(rootFolderBuilder.toString());
}
private String search(String urlString) throws IOException {
URL url = new URL(urlString);
HttpURLConnection http = (HttpURLConnection) url.openConnection();
http.setRequestProperty("Accept", "*/*");
// System.out.println(http.getResponseCode() + " " + http.getResponseMessage());
InputStream inputStream = http.getInputStream();
String contents = new String(inputStream.readAllBytes(), StandardCharsets.UTF_8);
// System.err.println(contents);
// Convert to json object as the json mapper works better
JSONObject json = XML.toJSONObject(contents);
String jsonString = json.toString(4);
http.disconnect();
return jsonString;
}
}
package nl.fairbydesign.backend.ncbi;
import nl.fairbydesign.backend.ncbi.objects.esample.EXPERIMENTPACKAGE;
import nl.fairbydesign.backend.ncbi.objects.esample.Root;
import nl.fairbydesign.backend.ncbi.objects.esample.STUDY;
import nl.fairbydesign.backend.ncbi.objects.sheets.Study;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.XSSFFont;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.jboss.logging.Logger;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import static nl.fairbydesign.backend.parsers.ExcelGenerator.OBSERVATION_UNIT_DESCRIPTION;
import static nl.fairbydesign.backend.parsers.ExcelGenerator.OBSERVATION_UNIT_NAME;
import static nl.fairbydesign.backend.parsers.ExcelValidator.*;
public class XLSX {
public static CellStyle headerStyle;
public static final int defaultCellWidth = 25;
public static CellStyle obligatoryStyle;
public static final Logger logger = Logger.getLogger(XLSX.class);
private static XSSFWorkbook workbook;
private static HashSet<Root> roots;
// List of studies
static HashSet<Study> studies = new HashSet<>();
/**
* @param rootEntries sample objects from NCBI
*/
public static void setup(HashSet<Root> rootEntries) {
roots = rootEntries;
// Create Excel file
workbook = new XSSFWorkbook();
makeHeaderCellStyle();
makeObligatoryHeaderCellStyle();
// Create project
createProject();
// Create investigation
createInvestigation();
// Create study
createStudy();
// Create observation unit
createObservationUnit();
// Create sample
// Create assay
// Write excel file
File filename = new File("TEST" + ".xlsx");
try {
FileOutputStream fileOut = new FileOutputStream(filename);
workbook.write(fileOut);
workbook.close();
fileOut.close();
} catch (IOException e) {
e.printStackTrace();
}
}
private static void createObservationUnit() {
Sheet observationUnitSheet = workbook.createSheet("ObservationUnit");
observationUnitSheet.setDefaultColumnWidth(defaultCellWidth);
// Create header row
Row observationUnitSheetRow = observationUnitSheet.createRow(0);
// Core headers
ArrayList<String> headerList = new ArrayList<>(List.of(new String[]{OBSERVATION_UNIT_IDENTIFIER, OBSERVATION_UNIT_DESCRIPTION, OBSERVATION_UNIT_NAME, STUDY_IDENTIFIER}));
// Other headers?
// Write header to first row
for (int i = 0; i < headerList.size(); i++) {
observationUnitSheetRow.createCell(0).setCellValue(headerList.get(i));
}
// Build up the environment for each document
HashSet<String> accessions = new HashSet<>();
for (Root root : roots) {
EXPERIMENTPACKAGE experimentPackage = root.getEXPERIMENT_PACKAGE_SET().getEXPERIMENT_PACKAGE();
String accession = "X" + experimentPackage.getSAMPLE().getAccession();
if (!accessions.contains(accession)) {
observationUnitSheetRow = observationUnitSheet.createRow(accessions.size() + 1);
// Set OBSERVATION_UNIT_IDENTIFIER
observationUnitSheetRow.createCell(headerList.indexOf(OBSERVATION_UNIT_IDENTIFIER)).setCellValue("xxxx");
//
observationUnitSheetRow.createCell(headerList.indexOf(OBSERVATION_UNIT_DESCRIPTION)).setCellValue("xxxx");
//
observationUnitSheetRow.createCell(headerList.indexOf(OBSERVATION_UNIT_NAME)).setCellValue("xxxx");
//
String studyAccession = root.getEXPERIMENT_PACKAGE_SET().getEXPERIMENT_PACKAGE().getSTUDY().getAccession();
observationUnitSheetRow.createCell(headerList.indexOf(STUDY_IDENTIFIER)).setCellValue(studyAccession);
//
accessions.add(accession);
}
}
}
private static void createStudy() {
ArrayList<String> headerList = new ArrayList<>(List.of(new String[]{STUDY_IDENTIFIER, STUDY_DESCRIPTION, STUDY_TITLE, INVESTIGATION_IDENTIFIER}));
Sheet studySheet = createSheet(headerList, "Study");
HashSet<String> accessions = new HashSet<>();
for (Root root : roots) {
STUDY STUDY = root.getEXPERIMENT_PACKAGE_SET().getEXPERIMENT_PACKAGE().getSTUDY();
if (!accessions.contains(STUDY.getAccession())) {
logger.info("Creating study " + STUDY.getAccession());
Row studySheetRow = studySheet.createRow(accessions.size() + 1);
studySheetRow.createCell(0).setCellValue(STUDY.getAccession());
studySheetRow.createCell(1).setCellValue(STUDY.getDESCRIPTOR().getSTUDY_ABSTRACT());
studySheetRow.createCell(2).setCellValue(STUDY.getDESCRIPTOR().getSTUDY_TITLE());
studySheetRow.createCell(3).setCellValue("INVESTIGATION_ID");
accessions.add(STUDY.getAccession());
}
}
}
private static void createInvestigation() {
}
private static void createProject() {
// Core study headers
ArrayList<String> headerList = new ArrayList<>(List.of(new String[]{PROJECT_IDENTIFIER, PROJECT_DESCRIPTION, PROJECT_TITLE}));
Sheet projectSheet = createSheet(headerList, "Project");
}
private static Sheet createSheet(ArrayList<String> headerList, String sheetName) {
// Create study sheet
Sheet sheet = workbook.createSheet(sheetName);
sheet.setDefaultColumnWidth(defaultCellWidth);
// Create header row?
Row sheetRow = sheet.createRow(0);
// Creating the headers
for (int i = 0; i < headerList.size(); i++) {
sheetRow.createCell(i).setCellValue(headerList.get(i));
}
// Header style
sheetRow.cellIterator().forEachRemaining(cell -> cell.setCellStyle(headerStyle));
return sheet;
}
/**
* To create the header identical in all sheets
*/
public static void makeHeaderCellStyle() {
headerStyle = workbook.createCellStyle();
headerStyle.setFillForegroundColor(IndexedColors.LIGHT_CORNFLOWER_BLUE.getIndex());
headerStyle.setFillPattern(FillPatternType