diff --git a/src/main/java/nl/munlock/graphdb/App.java b/src/main/java/nl/munlock/graphdb/App.java index bfe509a523c07c6285b510b16dbe27cacbbbc8c6..f5bc0c51ca202c443847b8b53e79939d71d32b2e 100644 --- a/src/main/java/nl/munlock/graphdb/App.java +++ b/src/main/java/nl/munlock/graphdb/App.java @@ -29,16 +29,18 @@ public class App { if (arguments.contains("-graphdb")) { logger.info("Graphdb parsing"); CommandOptions commandOptions = new CommandOptions(args); - + // Create repository CreateRemoteRepository.main(commandOptions); // Change to HDT... HashMap<String, String> hdtFiles = Download.findHDTFiles(commandOptions); Load.hdt(commandOptions, hdtFiles); + // Load investigation file + Load.rdf(commandOptions); } else if (arguments.contains("-hdt")) { logger.info("HDT parsing"); // Check if hdt is in args CommandOptionsHDT commandOptions = new CommandOptionsHDT(args); - HashMap<String, String> rdfFiles = Download.findHDTFiles(commandOptions.folder); + HashMap<String, String> rdfFiles = Download.findRDFFiles(commandOptions.folder + "%"); if (rdfFiles.size() > 0) { nl.munlock.hdt.Create.start(commandOptions, rdfFiles); } else { diff --git a/src/main/java/nl/munlock/irods/Download.java b/src/main/java/nl/munlock/irods/Download.java index 2ad0a79609e76395eb6fe1d97dbefff2ec47a7f0..3546f4cd94ee5df1cff569a4279ac3f4aae97265 100644 --- a/src/main/java/nl/munlock/irods/Download.java +++ b/src/main/java/nl/munlock/irods/Download.java @@ -88,7 +88,7 @@ public class Download { return paths; } - public static HashMap<String, String> findHDTFiles(String searchPath) throws GenQueryBuilderException, JargonException { + public static HashMap<String, String> findRDFFiles(String searchPath) throws GenQueryBuilderException, JargonException { logger.info("Searching for RDF files in " + searchPath); Connection connection = new Connection(); @@ -106,7 +106,7 @@ public class Download { queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_COLL_NAME, QueryConditionOperators.NOT_LIKE, "/" + connection.irodsAccount.getZone() + "/trash/%"); queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_COLL_NAME, QueryConditionOperators.NOT_LIKE, "/" + connection.irodsAccount.getZone() + "%/hdt"); // Find files in project and investigation - queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_COLL_NAME, QueryConditionOperators.LIKE, searchPath + "%"); + queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_COLL_NAME, QueryConditionOperators.LIKE, searchPath); queryBuilder.addSelectAsGenQueryValue(RodsGenQueryEnum.COL_COLL_NAME); queryBuilder.addSelectAsGenQueryValue(RodsGenQueryEnum.COL_DATA_NAME); queryBuilder.addSelectAsGenQueryValue(RodsGenQueryEnum.COL_D_DATA_CHECKSUM); diff --git a/src/main/java/nl/munlock/irods/Load.java b/src/main/java/nl/munlock/irods/Load.java index 9554281bc5e8a1c7972600080e790ad9f0e7d29d..9d20073979b3a8e55ee22d7fdb5e9276cc3b84f3 100644 --- a/src/main/java/nl/munlock/irods/Load.java +++ b/src/main/java/nl/munlock/irods/Load.java @@ -18,11 +18,15 @@ import org.eclipse.rdf4j.repository.event.base.RepositoryConnectionListenerAdapt import org.eclipse.rdf4j.repository.manager.RemoteRepositoryManager; import org.eclipse.rdf4j.rio.RDFFormat; import org.irods.jargon.core.exception.JargonException; +import org.irods.jargon.core.pub.io.IRODSFile; +import org.irods.jargon.core.query.GenQueryBuilderException; import org.rdfhdt.hdt.hdt.HDT; import org.rdfhdt.hdt.hdt.HDTManager; import org.rdfhdt.hdtjena.HDTGraph; import java.io.*; +import java.net.MalformedURLException; +import java.net.URL; import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.HashSet; @@ -36,22 +40,12 @@ public class Load { static Logger logger = Logger.getLogger(Load.class); private static RepositoryConnection repositoryConnection; private static Repository repository; + private static RemoteRepositoryManager remoteRepositoryManager; public static void hdt(CommandOptions commandOptions, HashMap<String, String> hdtFiles) throws IOException, JargonException { logger.info("Loading RDF files"); - String strServerUrl = commandOptions.graphdb; - - RemoteRepositoryManager remoteRepositoryManager = RemoteRepositoryManager.getInstance(strServerUrl); - remoteRepositoryManager.setUsernameAndPassword(commandOptions.username, commandOptions.password); - remoteRepositoryManager.init(); - - // Get the repository from repository manager, note the repository id - // set in configuration .ttl file - repository = remoteRepositoryManager.getRepository(commandOptions.project + "_" + commandOptions.investigation); - - // Open a connection to this repository - repositoryConnection = repository.getConnection(); + makeConnection(commandOptions); // Obtain all hashes IRI predicate = iri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"); @@ -134,7 +128,7 @@ public class Load { // Iterate over all output subset files for (File tripleFileSubset : localTripleFileSubsets) { // Sent to database - logger.info("Loading " + tripleFileSubset.getName() + " into " + strServerUrl); + logger.info("Loading " + tripleFileSubset.getName() + " into " + commandOptions.graphdb); // Generate hash of file GZIPInputStream inputStream = new GZIPInputStream(new FileInputStream(tripleFileSubset)); String sha256 = "sha256:"+org.apache.commons.codec.digest.DigestUtils.sha256Hex(inputStream); @@ -160,6 +154,21 @@ public class Load { remoteRepositoryManager.shutDown(); } + private static void makeConnection(CommandOptions commandOptions) { + String strServerUrl = commandOptions.graphdb; + + remoteRepositoryManager = RemoteRepositoryManager.getInstance(strServerUrl); + remoteRepositoryManager.setUsernameAndPassword(commandOptions.username, commandOptions.password); + remoteRepositoryManager.init(); + + // Get the repository from repository manager, note the repository id + // set in configuration .ttl file + repository = remoteRepositoryManager.getRepository(commandOptions.project + "_" + commandOptions.investigation); + + // Open a connection to this repository + repositoryConnection = repository.getConnection(); + } + public static void loadZippedFile(InputStream in, RDFFormat format) throws IOException { NotifyingRepositoryConnectionWrapper con = new NotifyingRepositoryConnectionWrapper(repository, repository.getConnection()); RepositoryConnectionListenerAdapter myListener = @@ -188,4 +197,22 @@ public class Load { } } } + + public static void rdf(CommandOptions commandOptions) throws JargonException, IOException { + Connection connection = new Connection(); + IRODSFile irodsFile = connection.fileFactory.instanceIRODSFile("/" + connection.irodsAccount.getZone() + "/projects/" + commandOptions.project + "/" + commandOptions.investigation); + logger.info("Searching for turtle files in " + irodsFile); + + makeConnection(commandOptions); + + for (File file : irodsFile.listFiles()) { + if (file.isHidden()) continue; + + if (file.getName().endsWith(".ttl")) { + downloadFile(connection, new File(file.getAbsolutePath())); + FileInputStream inputStream = new FileInputStream("." + file.getAbsolutePath()); + repositoryConnection.add(inputStream, null, RDFFormat.TURTLE); + } + } + } }