Skip to content
Snippets Groups Projects
Commit 2f8ea59d authored by Koehorst, Jasper's avatar Koehorst, Jasper
Browse files

rdf manager progress

parent 4980adb2
No related branches found
No related tags found
No related merge requests found
......@@ -79,6 +79,7 @@ dependencies {
// HDT clone and maven install the hdt library from hdt-java git repository
implementation group: 'org.rdfhdt', name: 'hdt-java-core', version: '2.1.3-SNAPSHOT'
implementation group: 'org.rdfhdt', name: 'hdt-jena', version: '2.1.3-SNAPSHOT'
compile "org.slf4j:slf4j-api:1.7.30"
compile "log4j:log4j:1.2.17"
......
......@@ -146,7 +146,7 @@ public class Download {
IRODSGenQueryBuilder queryBuilder = new IRODSGenQueryBuilder(true, null);
// Generalised to landingzone folder, check later if its ENA or Project
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_DATA_NAME, QueryConditionOperators.LIKE, "%.ttl");
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_DATA_NAME, QueryConditionOperators.LIKE, "%.hdt");
// Skip files found in trash
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_COLL_NAME, QueryConditionOperators.NOT_LIKE, "/" + connection.irodsAccount.getZone() + "/trash/%");
// Find files in project and investigation
......@@ -218,8 +218,10 @@ public class Download {
throw new JargonException("File " + irodsFile + " does not exist");
}
if (irodsFile.isHidden()) {
logger.info("File is hidden");
return;
}
logger.info("Downloading " + irodsFile);
DataTransferOperations dataTransferOperationsAO = connection.irodsFileSystem.getIRODSAccessObjectFactory().getDataTransferOperations(connection.irodsAccount);
......@@ -230,8 +232,10 @@ public class Download {
File localFile = new File("." + download);
// TODO remove entry
if (localFile.exists())
if (localFile.exists()) {
logger.info("File already exists");
return;
}
if (localFile.exists()) {
// Perform hash check!
......
package nl.munlock.irods;
import nl.munlock.graphdb.options.CommandOptions;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.riot.Lang;
import org.apache.jena.riot.RDFDataMgr;
import org.apache.log4j.Logger;
import org.eclipse.rdf4j.repository.Repository;
import org.eclipse.rdf4j.repository.RepositoryConnection;
import org.eclipse.rdf4j.repository.manager.RemoteRepositoryManager;
import org.eclipse.rdf4j.rio.RDFFormat;
import org.eclipse.rdf4j.rio.RDFParseException;
import org.eclipse.rdf4j.rio.Rio;
import org.irods.jargon.core.exception.JargonException;
import org.rdfhdt.hdt.hdt.HDT;
import org.rdfhdt.hdt.hdt.HDTManager;
import org.rdfhdt.hdtjena.HDTGraph;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import static nl.munlock.irods.Download.downloadFile;
......@@ -39,30 +46,42 @@ public class Load {
Connection connection = new Connection();
for (String rdfFile : rdfFiles.keySet()) {
if (rdfFile.contains(".fail")) continue;
downloadFile(connection, new File(rdfFile));
InputStream inputStream = new FileInputStream("." + rdfFile);
try {
Rio.parse(inputStream, commandOptions.base, RDFFormat.TURTLE);
inputStream.close();
} catch (RDFParseException e) {
logger.info("Failed " + rdfFile);
continue;
} catch (IOException e) {
System.err.println("NO IDEA " + e.getMessage());
}
logger.info("Loading " + rdfFile);
inputStream = new FileInputStream("." + rdfFile);
repositoryConnection.add(inputStream, commandOptions.base, RDFFormat.TURTLE);
// inputStream.close();
// Add triples
downloadFile(connection, new File(rdfFile));
logger.info("Converting to NT file");
HDT hdt = HDTManager.mapIndexedHDT("." + rdfFile);
HDTGraph graph = new HDTGraph(hdt);
Model model = ModelFactory.createModelForGraph(graph);
GZIPOutputStream outputStream = new GZIPOutputStream(new FileOutputStream("." + rdfFile + ".nt.gz"));
RDFDataMgr.write(outputStream, model, Lang.NTRIPLES) ;
outputStream.close();
// Open the stream
GZIPInputStream inputStream = new GZIPInputStream(new FileInputStream("." + rdfFile + ".nt.gz"));
// Sent to databaseConver
repositoryConnection.add(inputStream, commandOptions.base, RDFFormat.NTRIPLES);
// Add triples of loaded files
String statement = "<" + rdfFiles.get(rdfFile) + "> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://m-unlock.nl/ontology/RDFDataSet> .";
InputStream stream = new ByteArrayInputStream(statement.getBytes(StandardCharsets.UTF_8));
repositoryConnection.add(stream, null, RDFFormat.NTRIPLES);
// try {
// Rio.parse(inputStream, commandOptions.base, RDFFormat.TURTLE);
// inputStream.close();
// } catch (RDFParseException e) {
// logger.info("Failed " + rdfFile);
// continue;
// } catch (IOException e) {
// System.err.println("NO IDEA " + e.getMessage());
// }
//
// logger.info("Loading " + rdfFile);
// inputStream = new FileInputStream("." + rdfFile);
// repositoryConnection.add(inputStream, commandOptions.base, RDFFormat.TURTLE);
// inputStream.close();
}
// Shutdown connection, repository and manager
......
......@@ -58,7 +58,7 @@ public class AppTest {
public void mibmock(){
String[] args = {
"-project", "P_MIB-Amplicon",
"-investigation", "I_Mocks",
"-investigation", "I_Poultry_16S_MIB",
"-graphdb", "https://www.systemsbiology.nl/whatever/",
"-username", "admin",
"-password", "root"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment