A utility for managing documents stored in a PostgreSQL database. The documents are imported into a
PostgreSQL DB as full texts with the goal to be able to retrieve the documents by their PubMedID efficiently.
@@ -21,7 +21,7 @@
- de.julielab.xmlData.cli.CLI
+ de.julielab.costosys.cli.CLI
diff --git a/src/main/java/de/julielab/costosys/Constants.java b/src/main/java/de/julielab/costosys/Constants.java
new file mode 100644
index 0000000..27827bd
--- /dev/null
+++ b/src/main/java/de/julielab/costosys/Constants.java
@@ -0,0 +1,116 @@
+/**
+ * Constants.java
+ *
+ * Copyright (c) 2010, JULIE Lab.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Common Public License v1.0
+ *
+ * Author: faessler
+ *
+ * Current version: 1.0
+ * Since version: 1.0
+ *
+ * Creation date: 19.11.2010
+ **/
+
+package de.julielab.costosys;
+
+/**
+ * This class provides Constants useful for common tasks. Examples include
+ * database field names for the import or retrieval of Medline documents, table
+ * names etc.
+ *
+ * @author faessler
+ */
+public final class Constants {
+
+ // Field attribute names
+
+ /**
+ * The default PostgreSQL schema in which all data related tables are
+ * stored. The schema is {@value #DEFAULT_DATA_SCHEMA}.
+ */
+ public static final String DEFAULT_DATA_SCHEMA = "_data";
+
+ /**
+ * Constant for the name of a database table holding at least document ID
+ * and document data (e.g. PubmedId and Medline XML). Value:
+ * {@value #DEFAULT_DATA_TABLE_NAME}.
+ */
+ public static final String DEFAULT_DATA_TABLE_NAME = DEFAULT_DATA_SCHEMA
+ + "._data";
+
+ public static final String VALUE = "value";
+
+ // SQL type constants
+
+ public static final String TYPE_TEXT = "text";
+
+ public static final String TYPE_TEXT_ARRAY = "text[]";
+
+ public static final String TYPE_VARCHAR_ARRAY = "varchar[]";
+
+ public static final String TYPE_BINARY_DATA = "bytea";
+
+ /**
+ * Constant for a possible value of type.
+ *
+ * Used to to create a timestamp without time zone.
+ */
+ public static final String TYPE_TIMESTAMP_WITHOUT_TIMEZONE = "timestamp without time zone";
+
+ public static final String TYPE_INTEGER = "integer";
+
+ public static final String TYPE_BOOLEAN = "boolean";
+
+ public static final String TYPE_XML = "xml";
+
+ public static final String XML_FIELD_NAME = "xml";
+
+ public static final String PMID_FIELD_NAME = "pmid";
+
+ public static final String DATE_FIELD_NAME = "date";
+
+ public static final String NLM_ID_FIELD_NAME = "nlm_id";
+
+ public static final String AUTO_ID_FIELD_NAME = "autoID";
+
+ public static final String HAS_ERRORS = "has_errors";
+
+ public static final String LOG = "log";
+
+ public static final String IN_PROCESS = "is_in_process";
+
+ public static final String IS_PROCESSED = "is_processed";
+
+ public static final String LAST_COMPONENT = "last_component";
+
+ public static final String HOST_NAME = "host_name";
+
+ public static final String PROCESSING_TIMESTAMP = "processing_timestamp";
+
+ public static final String PID = "pid";
+
+ @Deprecated
+ public static final String DOC_ID_FIELD_NAME = "doc_id";
+
+ public static final String PROCESSED = "is_processed";
+
+ public static final String HIDDEN_CONFIG_PATH = "dbcTest.hiddenConfigPath";
+
+ public static final String COSTOSYS_CONFIG_FILE = "costosys.configurationfile";
+
+ public static final String MIRROR_COLLECTION_NAME = "public._mirrorSubsets";
+
+ public static final String MIRROR_COLUMN_DATA_TABLE_NAME = "datatablename";
+
+ public static final String MIRROR_COLUMN_SUBSET_NAME = "subsettablename";
+
+ public static final String MIRROR_COLUMN_DO_RESET = "performreset";
+
+ public static final String TIMESTAMP_FIELD_NAME = "timestamp";
+
+ public static final String TOTAL = "total";
+
+
+}
diff --git a/src/main/java/de/julielab/costosys/cli/CLI.java b/src/main/java/de/julielab/costosys/cli/CLI.java
new file mode 100644
index 0000000..122cb52
--- /dev/null
+++ b/src/main/java/de/julielab/costosys/cli/CLI.java
@@ -0,0 +1,1063 @@
+/**
+ * QueryCLI.java
+ *
+ * Copyright (c) 2010, JULIE Lab.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Common Public License v1.0
+ *
+ * Author: faessler
+ *
+ * Current version: 1.0
+ * Since version: 1.0
+ *
+ * Creation date: 20.11.2010
+ **/
+
+package de.julielab.costosys.cli;
+
+import de.julielab.costosys.Constants;
+import de.julielab.costosys.dbconnection.SubsetStatus;
+import de.julielab.costosys.medline.ConfigurationConstants;
+import de.julielab.costosys.medline.Updater;
+import de.julielab.xml.JulieXMLConstants;
+import de.julielab.xml.JulieXMLTools;
+import de.julielab.costosys.configuration.TableSchemaDoesNotExistException;
+import de.julielab.costosys.dbconnection.CoStoSysConnection;
+import de.julielab.costosys.dbconnection.DataBaseConnector;
+import org.apache.commons.cli.*;
+import org.apache.commons.configuration2.XMLConfiguration;
+import org.apache.commons.configuration2.builder.FileBasedConfigurationBuilder;
+import org.apache.commons.configuration2.builder.fluent.Parameters;
+import org.apache.commons.configuration2.ex.ConfigurationException;
+import org.apache.commons.lang3.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.*;
+import java.sql.SQLException;
+import java.util.*;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+import java.util.stream.Stream;
+
+import static de.julielab.costosys.dbconnection.DataBaseConnector.StatusElement.*;
+
+/**
+ * Command line interface for interaction with a databases holding e.g. Medline
+ * XML data.
+ *
+ * @author faessler / hellrich
+ */
+public class CLI {
+
+ private final static String DELIMITER = "\n--------------------------------------------------------------------------------\n";
+
+ private static final Logger LOG = LoggerFactory.getLogger(CLI.class);
+ private static final String KEY_PART_SEPERATOR = "\t";
+ private static final String FILE_SEPERATOR = System.getProperty("file.separator");
+ public static String[] USER_SCHEME_DEFINITION = new String[]{"dbcconfiguration.xml", "costosys.xml", "costosysconfiguration.xml"};
+ private static boolean verbose = false;
+
+ private static void logMessage(String msg) {
+ if (!verbose)
+ return;
+ LOG.info(msg);
+ }
+
+ public static void main(String[] args) throws Exception {
+ long time = System.currentTimeMillis();
+ String dbUrl;
+ String user;
+ String password;
+ String dbName;
+ String serverName;
+ String pgSchema;
+ String msg;
+ boolean updateMode = false;
+
+ boolean error = false;
+ Mode mode = Mode.ERROR;
+
+ Options options = getOptions();
+
+ // What has to be done
+ CommandLineParser parser = new DefaultParser();
+ CommandLine cmd = null;
+ try {
+ cmd = parser.parse(options, args);
+ } catch (ParseException e) {
+ LOG.error("Can't parse arguments: " + e.getMessage());
+ printHelp(options);
+ System.exit(1);
+ }
+
+ verbose = cmd.hasOption('v');
+ if (verbose)
+ LOG.info("Verbose logging enabled.");
+
+ // selecting the mode
+ if (cmd.hasOption("h"))
+ error = true; // To show help
+ if (cmd.hasOption("i"))
+ mode = Mode.IMPORT;
+ if (cmd.hasOption("u")) {
+ mode = Mode.IMPORT;
+ updateMode = true;
+ }
+ if (cmd.hasOption("q"))
+ mode = Mode.QUERY;
+ if (cmd.getOptionValue("s") != null)
+ mode = Mode.SUBSET;
+ if (cmd.getOptionValue("re") != null)
+ mode = Mode.RESET;
+ if (cmd.getOptionValue("st") != null)
+ mode = Mode.STATUS;
+ if (cmd.hasOption("t"))
+ mode = Mode.TABLES;
+ if (cmd.hasOption("lts"))
+ mode = Mode.LIST_TABLE_SCHEMAS;
+ if (cmd.hasOption("td"))
+ mode = Mode.TABLE_DEFINITION;
+ if (cmd.hasOption("sch"))
+ mode = Mode.SCHEME;
+ if (cmd.hasOption("ch"))
+ mode = Mode.CHECK;
+ if (cmd.hasOption("dc"))
+ mode = Mode.DEFAULT_CONFIG;
+ if (cmd.hasOption("dt"))
+ mode = Mode.DROP_TABLE;
+ if (cmd.hasOption("um"))
+ mode = Mode.UPDATE_MEDLINE;
+
+ // authentication
+ // configuration file
+ String dbcConfigPath = null;
+ if (cmd.hasOption("dbc"))
+ dbcConfigPath = cmd.getOptionValue("dbc");
+ if (dbcConfigPath == null)
+ dbcConfigPath = findConfigurationFile();
+ File conf = new File(dbcConfigPath);
+ dbUrl = cmd.getOptionValue('U');
+ if (dbUrl == null) {
+ msg = "No database URL given. Using value in configuration file";
+ logMessage(msg);
+ }
+ user = cmd.getOptionValue("n");
+ if (user == null) {
+ msg = "No database username given. Using value in configuration file";
+ logMessage(msg);
+ }
+ password = cmd.getOptionValue("p");
+ if (password == null) {
+ msg = "No password given. Using value in configuration file";
+ logMessage(msg);
+ }
+ serverName = cmd.getOptionValue("srv");
+ dbName = cmd.getOptionValue("db");
+ pgSchema = cmd.getOptionValue("pgs");
+ if (!((serverName != null && dbName != null) ^ dbUrl != null)
+ && !(serverName == null && dbName == null && dbUrl == null) && !conf.exists()) {
+ LOG.error(
+ "No base configuration has been found. Thus, you must specify server name and database name or the complete URL with -u (but not both).");
+ System.exit(1);
+ }
+
+ DataBaseConnector dbc = null;
+ try {
+ if (conf.exists()) {
+ logMessage(String.format("Using configuration file at %s", conf));
+ if (dbUrl == null)
+ dbc = new DataBaseConnector(serverName, dbName, user, password, pgSchema,
+ new FileInputStream(conf));
+ else
+ dbc = new DataBaseConnector(dbUrl, user, password, pgSchema, new FileInputStream(conf));
+ } else {
+ logMessage(String.format(
+ "No custom configuration found (should be located at %s). Using default configuration.",
+ Stream.of(USER_SCHEME_DEFINITION).collect(Collectors.joining(" or "))));
+ if (dbUrl == null)
+ dbc = new DataBaseConnector(serverName, dbName, user, password, pgSchema, null);
+ else
+ dbc = new DataBaseConnector(dbUrl, user, password, pgSchema, null);
+ }
+ } catch (FileNotFoundException e) {
+ e.printStackTrace();
+ }
+
+ // all those options...
+ String tableName = cmd.getOptionValue("td");
+ if (tableName == null)
+ tableName = cmd.getOptionValue("ch");
+
+ String subsetTableName = cmd.getOptionValue("s");
+ if (subsetTableName == null)
+ subsetTableName = cmd.getOptionValue("re");
+ if (subsetTableName == null)
+ subsetTableName = cmd.getOptionValue("renp");
+ if (subsetTableName == null)
+ subsetTableName = cmd.getOptionValue("st");
+
+ String fileStr = cmd.getOptionValue("f");
+ if (fileStr == null)
+ fileStr = cmd.getOptionValue("i");
+ if (fileStr == null)
+ fileStr = cmd.getOptionValue("u");
+ if (cmd.hasOption("im")) {
+ mode = Mode.IMPORT;
+ // For some reasons, multuple versions of some documents have been found in the baseline in the past.
+ // Just use the update mode.
+ XMLConfiguration importConfig = loadXmlConfiguration(new File(cmd.getOptionValue("im")));
+ fileStr = importConfig.getString(ConfigurationConstants.INSERTION_INPUT);
+ updateMode = true;
+ }
+
+ String superTableName = cmd.getOptionValue("z");
+ if (superTableName == null)
+ superTableName = dbc.getActiveDataTable();
+
+ String queryStr = cmd.getOptionValue("q");
+ String subsetJournalFileName = cmd.getOptionValue("j");
+ String subsetQuery = cmd.getOptionValue("o");
+ String randomSubsetSize = cmd.getOptionValue("r");
+ String whereClause = cmd.getOptionValue("w");
+ String xpath = cmd.getOptionValue("x");
+ String baseOutDir = cmd.getOptionValue("out");
+ String batchSize = cmd.getOptionValue("bs");
+ String limit = cmd.getOptionValue("l");
+ String tableSchema = cmd.getOptionValue("ts") != null ? cmd.getOptionValue("ts") : dbc.getActiveTableSchema();
+ boolean useDelimiter = baseOutDir != null ? false : cmd.hasOption("d");
+ boolean returnPubmedArticleSet = cmd.hasOption("pas");
+ boolean mirrorSubset = cmd.hasOption("m");
+ boolean all4Subset = cmd.hasOption("a");
+ Integer numberRefHops = cmd.hasOption("rh") ? Integer.parseInt(cmd.getOptionValue("rh")) : null;
+
+ if (tableSchema.matches("[0-9]+")) {
+ tableSchema = dbc.getConfig().getTableSchemaNames().get(Integer.parseInt(tableSchema));
+ }
+
+ try (CoStoSysConnection conn = dbc.obtainOrReserveConnection()) {
+ switch (mode) {
+ case QUERY:
+ QueryOptions qo = new QueryOptions();
+ qo.fileStr = fileStr;
+ qo.queryStr = queryStr;
+ qo.useDelimiter = useDelimiter;
+ qo.pubmedArticleSet = returnPubmedArticleSet;
+ qo.xpath = xpath;
+ qo.baseOutDirStr = baseOutDir;
+ qo.batchSizeStr = batchSize;
+ qo.limitStr = limit;
+ qo.tableName = superTableName;
+ qo.tableSchema = tableSchema;
+ qo.whereClause = whereClause;
+ qo.numberRefHops = numberRefHops;
+ error = doQuery(dbc, qo);
+ break;
+
+ case IMPORT:
+ error = doImportOrUpdate(dbc, fileStr, queryStr, superTableName, updateMode);
+ break;
+
+ case SUBSET:
+ error = doSubset(dbc, subsetTableName, fileStr, queryStr, superTableName, subsetJournalFileName,
+ subsetQuery, mirrorSubset, whereClause, all4Subset, randomSubsetSize, numberRefHops);
+ break;
+
+ case RESET:
+ if (subsetTableName == null) {
+ LOG.error("You must provide the name of the subset table to reset.");
+ error = true;
+ } else {
+ boolean files = cmd.hasOption("f");
+ try {
+ if (!files || StringUtils.isBlank(fileStr)) {
+ boolean np = cmd.hasOption("np");
+ boolean ne = cmd.hasOption("ne");
+ String lc = cmd.hasOption("lc") ? cmd.getOptionValue("lc") : null;
+ if (np)
+ logMessage("table reset is restricted to non-processed table rows");
+ if (ne)
+ logMessage("table reset is restricted to table row without errors");
+ if (lc != null)
+ logMessage("table reset is restricted to rows with last component " + lc);
+ if (!np && !ne && lc == null) {
+ SubsetStatus status = dbc.status(subsetTableName, EnumSet.of(IN_PROCESS, IS_PROCESSED, TOTAL));
+ long inProcess = status.inProcess;
+ long isProcessed = status.isProcessed;
+ long total = status.total;
+ // We don't bother with too small datasets, worst
+ // case would be to do it again for 10000 docs which
+ // is not much.
+ if (total > 10000 && inProcess + isProcessed >= total / 2) {
+ String input = getYesNoAnswer("The subset table \"" + subsetTableName
+ + "\" is in process or already processed over 50%."
+ + " Do you really wish to reset it completely into an unprocessed state? (yes/no)");
+ if (input.equals("no"))
+ System.exit(0);
+ }
+ }
+ dbc.resetSubset(subsetTableName, np, ne, lc);
+ } else {
+ logMessage("Resetting all documents identified by the IDs in file \"" + fileStr + "\".");
+ try {
+ List pkValues = asListOfArrays(fileStr);
+ dbc.resetSubset(subsetTableName, pkValues);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+ } catch (TableNotFoundException e) {
+ e.printStackTrace();
+ }
+ }
+ break;
+ case STATUS:
+ error = doStatus(dbc,
+ subsetTableName,
+ cmd.hasOption("he"),
+ cmd.hasOption("isp"),
+ cmd.hasOption("inp"),
+ cmd.hasOption("to"),
+ cmd.hasOption("lc"));
+ break;
+
+ case TABLES:
+ for (String s : dbc.getTables())
+ System.out.println(s);
+ break;
+
+ case TABLE_DEFINITION:
+ for (String s : dbc.getTableDefinition(tableName))
+ System.out.println(s);
+ break;
+
+ case LIST_TABLE_SCHEMAS:
+ System.out.println("The following table schema names are contained in the current configuration:\n");
+ List tableSchemaNames = dbc.getConfig().getTableSchemaNames();
+ IntStream.range(0, tableSchemaNames.size()).mapToObj(i -> i + " " + tableSchemaNames.get(i))
+ .forEach(System.out::println);
+ break;
+
+ case SCHEME:
+ System.out.println(dbc.getScheme());
+ break;
+
+ case CHECK:
+ dbc.checkTableDefinition(tableName);
+ break;
+
+ case DEFAULT_CONFIG:
+ System.out.println(new String(dbc.getEffectiveConfiguration()));
+ break;
+
+ case DROP_TABLE:
+ dropTableInteractively(dbc, cmd.getOptionValue("dt"));
+ break;
+
+ case UPDATE_MEDLINE:
+ Updater updater = new Updater(loadXmlConfiguration(new File(cmd.getOptionValue("um"))));
+ updater.process(dbc);
+ break;
+
+ case ERROR:
+ break;
+ }
+ }
+
+ if (error) {
+ // printHelp(options);
+ System.exit(1);
+ }
+
+ time = System.currentTimeMillis() - time;
+ LOG.info(String.format("Processing took %d seconds.", time / 1000));
+ }
+
+ public static String findConfigurationFile() throws ConfigurationNotFoundException {
+ String configFileProperty = System.getProperty(Constants.COSTOSYS_CONFIG_FILE);
+ if (configFileProperty != null && new File(configFileProperty).exists())
+ return configFileProperty;
+ File workingDirectory = new File(".");
+ Set possibleConfigFileNames = new HashSet<>(Arrays.asList(USER_SCHEME_DEFINITION));
+ for (String file : workingDirectory.list()) {
+ if (possibleConfigFileNames.contains(file.toLowerCase()))
+ return file;
+ }
+ throw new ConfigurationNotFoundException("No configuration file with a name in " + Arrays.toString(USER_SCHEME_DEFINITION) + " was found in the current working directory " + new File(".").getAbsolutePath());
+ }
+
+ private static void dropTableInteractively(DataBaseConnector dbc, String tableName) {
+ try {
+ if (!dbc.tableExists(tableName)) {
+ if (tableName.contains("."))
+ System.err
+ .println("Table \"" + tableName + "\" does not exist in database " + dbc.getDbURL() + ".");
+ else
+ System.err.println("Table \"" + tableName + "\" does not exist in database " + dbc.getDbURL()
+ + " in active schema " + dbc.getActivePGSchema() + ".");
+ return;
+ } else {
+ String unqualifiedTableName = tableName.contains(".") ? tableName.substring(tableName.indexOf(".") + 1)
+ : tableName;
+ String schema = tableName.contains(".") ? tableName.substring(0, tableName.indexOf("."))
+ : dbc.getActivePGSchema();
+ System.out.println("Found table \"" + unqualifiedTableName + "\" in schema " + schema + " in database "
+ + dbc.getDbURL() + ". Do you really want to drop it (y/n)?");
+ BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
+ String response = in.readLine().toLowerCase();
+ while (!response.equals("y") && !response.equals("yes") && !response.equals("n")
+ && !response.equals("no")) {
+ System.out.println("Please specify y(es) or n(o).");
+ response = in.readLine().toLowerCase();
+ }
+ if (response.startsWith("y")) {
+ System.out.println("Dropping table \"" + unqualifiedTableName + "\" in Postgres schema \"" + schema
+ + "\" of database " + dbc.getDbURL());
+ dbc.dropTable(String.join(".", schema, unqualifiedTableName));
+ } else {
+ System.out.println("User canceled. Aborting process.");
+ }
+ }
+ } catch (IOException | SQLException e) {
+ e.printStackTrace();
+ }
+ }
+
+ /**
+ * Poses question to the user and awaits for a yes or
+ * no answer and returns it.
+ *
+ * @param question the question raised
+ * @return the answer yes or no
+ */
+ private static String getYesNoAnswer(String question) {
+ BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
+ String input = "";
+ try {
+ while (!input.equals("yes") && !input.equals("no")) {
+ System.out.println(question);
+ input = br.readLine();
+ }
+ } catch (IOException e) {
+ LOG.error("Something went wrong while reading from STDIN: ", e);
+ System.exit(1);
+ }
+ return input;
+ }
+
+ private static boolean doStatus(DataBaseConnector dbc, String subsetTableName, boolean showHasErrors, boolean showIsProcessed, boolean showIsInProcess, boolean showTotal, boolean showLastComponent) {
+ boolean error = false;
+ try {
+ if (subsetTableName == null) {
+ LOG.error("You must provide the name of a subset table to display its status.");
+ error = true;
+ } else {
+ EnumSet modes = EnumSet.noneOf(DataBaseConnector.StatusElement.class);
+ if (showHasErrors) modes.add(DataBaseConnector.StatusElement.HAS_ERRORS);
+ if (showIsProcessed) modes.add(DataBaseConnector.StatusElement.IS_PROCESSED);
+ if (showIsInProcess) modes.add(DataBaseConnector.StatusElement.IN_PROCESS);
+ if (showTotal) modes.add(DataBaseConnector.StatusElement.TOTAL);
+ if (showLastComponent) modes.add(DataBaseConnector.StatusElement.LAST_COMPONENT);
+ if (modes.isEmpty())
+ modes = EnumSet.allOf(DataBaseConnector.StatusElement.class);
+
+ try (CoStoSysConnection ignored = dbc.obtainOrReserveConnection()) {
+ SubsetStatus status = dbc.status(subsetTableName, modes);
+ System.out.println(status);
+ }
+ }
+ } catch (TableSchemaDoesNotExistException e) {
+ LOG.error(e.getMessage());
+ error = true;
+ } catch (TableNotFoundException e) {
+ LOG.error(e.getMessage());
+ e.printStackTrace();
+ }
+ return error;
+ }
+
+ private static boolean doSubset(DataBaseConnector dbc, String subsetTableName, String fileStr, String queryStr,
+ String superTableName, String subsetJournalFileName, String subsetQuery, boolean mirrorSubset,
+ String whereClause, boolean all4Subset, String randomSubsetSize, Integer numberRefHops)
+ throws SQLException {
+ String comment = "";
+ boolean error;
+ ArrayList ids = null;
+ String condition = null;
+
+ error = checkSchema(dbc, subsetTableName);
+ if (!error) {
+ if (subsetJournalFileName != null) {
+ try {
+ ids = asList(subsetJournalFileName);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ if (ids.size() == 0) {
+ LOG.error(subsetJournalFileName + " is empty.");
+ error = true;
+ }
+ StringBuilder sb = new StringBuilder();
+ for (String id : ids)
+ sb.append(", ").append(id);
+ condition = Constants.NLM_ID_FIELD_NAME;
+ comment = "Subset created " + new Date().toString() + " by matching with " + superTableName + " on "
+ + condition + ": " + sb.substring(2);
+ } else if (subsetQuery != null) {
+ logMessage("Querying PubMed for: " + subsetQuery);
+ ids = QueryPubMed.query(subsetQuery);
+ if (ids.size() == 0) {
+ LOG.error("No results for your query.");
+ error = true;
+ } else
+ LOG.info("PubMed delivered " + ids.size() + " results.");
+ condition = Constants.PMID_FIELD_NAME;
+ comment = "Subset created " + new Date().toString() + " by matching with " + superTableName
+ + " on PubMed-query: " + subsetQuery;
+ } else if (all4Subset) {
+ logMessage("Creating subset by matching all entries from table " + superTableName + ".");
+ comment = "Subset created " + new Date().toString() + " by matching with " + superTableName;
+ } else if (whereClause != null) {
+ comment = "Subset created " + new Date().toString() + " by selecting rows from " + superTableName
+ + " with where clause \"" + whereClause + "\"";
+ } else if (randomSubsetSize != null) {
+ try {
+ new Integer(randomSubsetSize);
+ comment = "Subset created " + new Date().toString() + " by randomly selecting " + randomSubsetSize
+ + " rows from " + superTableName + ".";
+ } catch (NumberFormatException e) {
+ LOG.error(randomSubsetSize + " is not a number!");
+ error = true;
+ }
+ } else if (fileStr != null) {
+ try {
+ ids = asList(fileStr);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ if (ids.size() == 0) {
+ LOG.error(fileStr + " is empty.");
+ error = true;
+ }
+ condition = dbc.getFieldConfiguration(dbc.getActiveTableSchema()).getPrimaryKey()[0];
+ comment = "Subset created " + new Date().toString() + " by matching with " + superTableName + " on "
+ + ids.size() + " " + condition + "s;";
+ } else if (mirrorSubset) {
+ comment = "Subset created " + new Date().toString() + " as to mirror " + superTableName + ";";
+ } else {
+ error = true;
+ LOG.error("You must choose a way to define the subset.");
+ }
+
+ comment = escapeSingleQuotes(comment);
+ }
+ if (!dbc.withConnectionQueryBoolean(c -> dbc.tableExists(superTableName))) {
+ logMessage("Checking whether super table " + superTableName + " exists...");
+ LOG.error("Table " + superTableName + " doesn't exist!");
+ error = true;
+ }
+ if (!error) {
+ try (CoStoSysConnection connPair = dbc.obtainOrReserveConnection()) {
+ if (!dbc.tableExists(subsetTableName)) {
+ logMessage("No table with the name \"" + subsetTableName + "\" exists, creating new subset table...");
+ dbc.createSubsetTable(subsetTableName, superTableName, numberRefHops, comment);
+ logMessage("Created table " + subsetTableName);
+ } else
+ LOG.error("Table " + subsetTableName + " allready exists.");
+ if (dbc.isEmpty(subsetTableName) && !error) {
+ if (all4Subset)
+ dbc.initSubset(subsetTableName, superTableName);
+ else if (whereClause != null)
+ dbc.initSubsetWithWhereClause(subsetTableName, superTableName, whereClause);
+ else if (ids != null && ids.size() > 0)
+ dbc.initSubset(ids, subsetTableName, superTableName, condition);
+ else if (mirrorSubset)
+ dbc.initMirrorSubset(subsetTableName, superTableName, true);
+ else if (randomSubsetSize != null) {
+ dbc.initRandomSubset(new Integer(randomSubsetSize), subsetTableName, superTableName);
+ }
+ logMessage("Subset defined.");
+ } else {
+ LOG.error(subsetTableName + " is not empty, please use another table.");
+ error = true;
+ }
+ }
+ }
+ return error;
+ }
+
+ private static boolean doImportOrUpdate(DataBaseConnector dbc, String fileStr, String queryStr,
+ String superTableName, boolean updateMode) throws SQLException {
+ boolean error = false;
+ if (fileStr != null) {
+
+ if (!dbc.withConnectionQueryBoolean(c -> c.tableExists(superTableName))) {
+ error = checkSchema(dbc, superTableName);
+ final String comment = "Data table created " + new Date().toString() + " by importing data from path " + fileStr;
+ if (!error) {
+ dbc.withConnectionExecute(c -> c.createTable(superTableName, comment));
+ logMessage("Created table " + superTableName);
+
+ }
+ }
+
+ if (dbc.withConnectionQueryBoolean(c -> c.isEmpty(superTableName)) && !updateMode) {
+ dbc.withConnectionExecute(c -> c.importFromXMLFile(fileStr, superTableName));
+ } else {
+ logMessage("Table is not empty or update mode was explicitly specified, processing Updates.");
+ dbc.withConnectionExecute(c -> c.updateFromXML(fileStr, superTableName));
+ logMessage("Updates finished.");
+ }
+ } else {
+ LOG.error("You must specify a file or directory to retrieve XML files from.");
+ error = true;
+ }
+ return error;
+ }
+
+ private static boolean doQuery(DataBaseConnector dbc, QueryOptions qo) {
+ boolean error = false;
+
+ /**
+ * The document IDs that should be returned (optional)
+ */
+ String queryStr = qo.queryStr;
+ String fileStr = qo.fileStr;
+ String tableName = qo.tableName;
+ String tableSchema = qo.tableSchema;
+ boolean useDelimiter = qo.useDelimiter;
+ boolean pubmedArticleSet = qo.pubmedArticleSet;
+ String xpath = qo.xpath;
+ // this could be a directory or file name, depending on parameters
+ String baseOutFile = qo.baseOutDirStr;
+ String batchSizeStr = qo.batchSizeStr;
+ String limitStr = qo.limitStr;
+ Integer numberRefHops = qo.numberRefHops;
+
+ // In the following algorithm, first of all each possible
+ // parameter/resource is acquired. Further down is then one single
+ // algorithm iterating over queried documents and treating them
+ // accordingly to the parameters which have been found.
+ File outfile = null;
+ int batchSize = 0;
+ BufferedWriter bw = null;
+ boolean keysExplicitlyGiven = fileStr != null || queryStr != null;
+ long limit = limitStr != null ? Integer.parseInt(limitStr) : -1;
+
+ boolean createDirectory = baseOutFile != null && !pubmedArticleSet;
+ if (verbose) {
+ logMessage("Creating " + (createDirectory ? "directory" : "file") + " " + baseOutFile
+ + " to write query results to.");
+ }
+
+ if (createDirectory) {
+ outfile = new File(baseOutFile);
+ if (!outfile.exists()) {
+ logMessage("Directory " + outfile.getAbsolutePath()
+ + " does not exist and will be created (as well as sub dircetories for file batches if required).");
+ outfile.mkdir();
+ }
+ logMessage("Writing queried documents to " + outfile.getAbsolutePath());
+
+ if (batchSizeStr != null) {
+ try {
+ batchSize = Integer.parseInt(batchSizeStr);
+ logMessage("Dividing query result files in batches of " + batchSize);
+ if (batchSize < 1)
+ throw new NumberFormatException();
+ } catch (NumberFormatException e) {
+ LOG.error(
+ "Error parsing \"{}\" into an integer. Please deliver a positive numeric value for the batch size of files.");
+ }
+ }
+ }
+
+ if (!error) {
+ List keys = new ArrayList();
+ if (fileStr != null) {
+ try {
+ keys = asListOfArrays(fileStr);
+ } catch (IOException e1) {
+ LOG.error("Could not open '" + new File(fileStr).getAbsolutePath() + "'.");
+ error = true;
+ }
+ }
+ if (queryStr != null) {
+ for (String pmid : queryStr.split(","))
+ keys.add(pmid.split(KEY_PART_SEPERATOR));
+ }
+
+ // Main algorithm iterating over documents.
+ try {
+ if (!error) {
+ Iterator it;
+ if (!keysExplicitlyGiven) {
+ it = dbc.querySubset(tableName, qo.whereClause, limit, numberRefHops, tableSchema);
+ } else if (keys.size() > 0)
+ it = dbc.retrieveColumnsByTableSchema(keys, tableName, tableSchema);
+ else
+ throw new IllegalStateException(
+ "No query keys have been explicitly given (e.g. in a file) nor should the whole table be queried.");
+ int i = 0;
+ // The name of the sub directories will just be their batch
+ // number. We start at -1 because the batchNumber will be
+ // incremented first of all (0 % x == 0, Ax).
+ int batchNumber = -1;
+ // outDir will be baseOutDir plus the current batch number
+ // of files when
+ // saving the queried files in separate batches is wished.
+ File outDir = outfile;
+
+ if (pubmedArticleSet) {
+ if (null != baseOutFile) {
+ logMessage(
+ "Creating a single file with a PubmedArticleSet and writing it to " + baseOutFile);
+ bw = new BufferedWriter(new FileWriter(baseOutFile));
+ }
+ print("\n"
+ + "\n"
+ + "", bw);
+ }
+
+ while (it.hasNext()) {
+ byte[][] idAndXML = it.next();
+ if (outfile != null) {
+ // if we want batches, create appropriate
+ // subdirectories
+ if (batchSize > 0 && i % batchSize == 0) {
+ ++batchNumber;
+ // Adjust the sub directory for the new batch.
+ String subDirectoryName = (batchNumber > -1 && batchSize > 0
+ ? Integer.toString(batchNumber) : "");
+ String subDirPath = outfile.getAbsolutePath() + FILE_SEPERATOR + subDirectoryName;
+ outDir = new File(subDirPath);
+ outDir.mkdir();
+ }
+
+ // Write the current file into the given directory
+ // and use the key as file name
+ String filename = new String(idAndXML[0]);
+
+ if (!pubmedArticleSet) {
+ if (bw != null)
+ bw.close();
+ bw = new BufferedWriter(new FileWriter(outDir + FILE_SEPERATOR + filename));
+ }
+ }
+ if (xpath == null) {
+ StringBuilder sb = new StringBuilder();
+ if (pubmedArticleSet)
+ sb.append("\n");
+ sb.append(new String(idAndXML[1], "UTF-8"));
+ if (pubmedArticleSet)
+ sb.append("\n ");
+ print(sb.toString(), bw);
+ } else {
+ // 'values' contains for each XPath delivered one
+ // array of Strings holding the values for this
+ // XPath (e.g. the AuthorList mostly yields several
+ // values).
+ String[][] values = getXpathValues(idAndXML[1], xpath);
+ for (String[] valuesOfXpath : values)
+ for (String singleValue : valuesOfXpath)
+ print(singleValue, bw);
+ }
+ if (useDelimiter)
+ System.out.println(DELIMITER);
+ ++i;
+
+ }
+
+ if (pubmedArticleSet) {
+ print(" ", bw);
+ }
+ }
+ } catch (IOException e) {
+ e.printStackTrace();
+ } catch (SQLException e) {
+ e.printStackTrace();
+ } finally {
+ try {
+ if (bw != null)
+ bw.close();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+
+ return error;
+ }
+
+ /**
+ * @param string
+ * @param bw
+ * @throws IOException
+ */
+ private static void print(String string, BufferedWriter bw) throws IOException {
+ if (bw == null)
+ System.out.println(string);
+ else
+ bw.write(string + "\n");
+ }
+
+ private static String[][] getXpathValues(byte[] next, String xpaths) {
+
+ String[] xpathArray = xpaths.split(",");
+ List> fields = new ArrayList>();
+ for (String xpath : xpathArray) {
+ Map field = new HashMap();
+ field.put(JulieXMLConstants.NAME, xpath);
+ field.put(JulieXMLConstants.XPATH, xpath);
+ field.put(JulieXMLConstants.RETURN_XML_FRAGMENT, "true");
+ field.put(JulieXMLConstants.RETURN_ARRAY, "true");
+ fields.add(field);
+ }
+
+ String[][] retStrings = new String[xpathArray.length][];
+
+ Iterator> it = JulieXMLTools.constructRowIterator(next, 1024, ".", fields, "your result");
+ if (it.hasNext()) {
+ Map row = it.next();
+ for (int i = 0; i < xpathArray.length; i++) {
+ // Get the field "xpath" which was given as field name above; we
+ // wanted multiple results to be returned in an array.
+ String[] values = (String[]) row.get(xpathArray[i]);
+ if (values == null)
+ values = new String[]{"XPath " + xpaths + " does not exist in this document."};
+ retStrings[i] = values;
+ }
+ if (it.hasNext()) {
+ // What happened? We wanted all values in one array, so this
+ // should not happen.
+ LOG.warn(
+ "There are more results for the XPath {} then expected and not all have been returned. Please contact a developer for help.",
+ xpaths);
+ }
+ }
+ return retStrings;
+ }
+
+ private static void printHelp(Options options) {
+ HelpFormatter formatter = new HelpFormatter();
+ formatter.setWidth(160);
+ formatter.printHelp(CLI.class.getName(), options);
+ }
+
+ private static Options getOptions() {
+ Options options = new Options();
+
+ // -------------------- OptionGroup for available modes --------------
+ OptionGroup modes = new OptionGroup();
+
+ modes.addOption(buildOption("i", "import", "Import data into the _data table", "file/dir to import"));
+ modes.addOption(buildOption("im", "importmedline", "Import PubMed/MEDLINE data into the _data table. The parameter is a XML file holding information about the PubMed/MEDLINE baseline location. It is the same file format used for the -um mode.", "XML MEDLINE configuration"));
+ modes.addOption(buildOption("u", "update", "Update _data table", "file/dir to update from"));
+ modes.addOption(buildOption("um", "updatemedline", "Update _data table from PubMed/MEDLINE update files. Keeps track of already applied update files via an internal table. The parameter is a XML file holding information about the update file location. It is the same file format used for the -im mode.", "XML MEDLINE configuration"));
+ modes.addOption(buildOption("s", "subset",
+ "Define a subset table; use -f, -o, -a, -m, -w or -r to specify the subsets source.",
+ "name of the new subset table"));
+ modes.addOption(buildOption("re", "reset",
+ "Resets a subset table to a not-yet-processed state. Flags:\n" + "-np only reset non-processed items\n"
+ + "-ne only reset items without errors\n"
+ + "-lc to reset only those items with the given last component\n"
+ + "-f a partial reset can be achieved by specifying a file containing one primary key value for each document to be resetted",
+ "subset table name"));
+ modes.addOption(
+ buildOption("st", "status", "Show the processing status of a subset table. Generates a small report containing the number of processed and total documents of a subset table. " +
+ "The report can be customized using the -he, -isp, -inp, -to and -slc switches", "subset table name"));
+
+ OptionBuilder.withLongOpt("query");
+ OptionBuilder.withDescription("Query a table (default: " + Constants.DEFAULT_DATA_TABLE_NAME
+ + ") for XMLs. You can enter the primary keys directly or use -f to specify a file. If you define none of both, the whole table will be returned.\n"
+ + "Use -d to display delimiters between the results.\n"
+ + "Use -z to specify the target table. If the table is a subset, only documents in this subset will be returned.\n"
+ + "Use -l to set a limit of returned documents.\n"
+ + "Use -x to specify an XPath expression go extract specific parts of the queried XML documents.\n"
+ + "Use -out to save the query results to file.");
+ OptionBuilder.hasOptionalArg();
+ OptionBuilder.withArgName("your query");
+ modes.addOption(OptionBuilder.create("q"));
+
+ modes.addOption(buildOption("h", "help", "Displays all possible parameters."));
+ modes.addOption(buildOption("t", "tables", "Displays all tables in the active scheme."));
+
+ modes.addOption(buildOption("td", "tabledefinition", "Displays the columns of a table.", "the table"));
+
+ modes.addOption(buildOption("ds", "displayscheme", "Displays the active scheme."));
+ modes.addOption(buildOption("ch", "check",
+ "Checks if a table confirms to its definition (for subsets: only primary keys!)", "table"));
+ modes.addOption(buildOption("dc", "defaultconfig", "Prints the defaultConfiguration."));
+ modes.addOption(buildOption("dt", "droptable", "Drops the given table.", "table"));
+
+ modes.addOption(buildOption("lts", "listtableschemas",
+ "Displays all table schema names in the configuration. The showed name index can be used as value for the -ts option."));
+
+ modes.setRequired(true);
+
+ options.addOptionGroup(modes);
+
+ // -------------------- OptionGroup for exclusive parameters--------------
+ OptionGroup exclusive = new OptionGroup();
+
+ exclusive.addOption(buildOption("f", "file",
+ "Set the file used for query, subset creation or partial subset reset.", "file"));
+ exclusive.addOption(buildOption("o", "online",
+ "Defines the subset by a PubMed query - remember to wrap it in double quotation marks!", "query"));
+ exclusive.addOption(buildOption("a", "all", "Use all entries of the _data table for the subset."));
+ exclusive.addOption(buildOption("r", "random",
+ "Generates a random subset, you must provide its size as a parameter. Often used with -z.", "size"));
+ exclusive.addOption(buildOption("m", "mirror",
+ "Creates a subset table which mirrors the database table. I.e. when the data table gets new records, the mirror subset(s) will be updated accordingly."));
+ exclusive
+ .addOption(buildOption("w", "where", "Uses a SQL WHERE clause during subset definition.", "condition"));
+ exclusive.addOption(
+ buildOption("j", "journals", "Define a subset by providing a file with journal names.", "file"));
+ exclusive.addOption(
+ buildOption("l", "limit", "For use with -q. Restricts the number of documents returned.", "limit"));
+
+
+ options.addOption(buildOption("he", "has errors",
+ "Flag for -st(atus) mode to add the 'has errors' statistic to a subset status report."));
+ options.addOption(buildOption("isp", "is processed",
+ "Flag for -st(atus) mode to add the 'is processed' statistic to a subset status report."));
+ options.addOption(buildOption("inp", "is in process",
+ "Flag for -st(atus) mode to add the 'is in process' statistic to a subset status report."));
+ options.addOption(buildOption("to", "total",
+ "Flag for -st(atus) mode to add the 'total' statistic to a subset status report."));
+ options.addOption(buildOption("slc", "show last component",
+ "Flag for -st(atus) mode to add the 'last component' statistic to a subset status report."));
+
+
+ options.addOption(buildOption("np", "not processed",
+ "Flag for -re(set) mode to restrict to non-processed table rows. May be combined with -ne, -lc."));
+ options.addOption(buildOption("ne", "no errors",
+ "Flag for -re(set) mode to restrict to table rows without errors. May be combined with -np, -lc."));
+ options.addOption(buildOption("lc", "last component",
+ "Option for -re(set) mode to restrict to table rows to a given last component identifier. May be combined with -np, -ne.",
+ "component name"));
+
+ options.addOptionGroup(exclusive);
+
+ // --------------- optional details for many modes --------------
+ options.addOption(buildOption("z", "superset",
+ "Provides a superset name for definition of a subset or the name of a data table.",
+ "name of the superset table"));
+ options.addOption(buildOption("v", "verbose", "Activate verbose informational ouput of the tool's actions"));
+
+ options.addOption(buildOption("d", "delimiter", "Display a line of \"-\" as delimiter between the results."));
+ options.addOption(buildOption("pas", "pubmedarticleset",
+ "For use with -q. The queried documents will be interpreted as Medline XML documents and will be enclosed in PubmedArticleSet."));
+ options.addOption(buildOption("out", "out",
+ "The file or directory where query results are written to. By default, a directory will be created and it will be filled with one file per document. The files will have the name of their database primary key. Modifying parameters:\n"
+ + "Use -bs to create subdirectories for batches of files.\n"
+ + "Use -pas to create no directory but a single XML file representing a PubmedArticleSet. This assumes that the queried documents are Medline or Pubmed XML documents.",
+ "output directory"));
+ options.addOption(buildOption("bs", "batchsize",
+ "The number of queried documents (by -q and -out) which should be written in one directory. Subdirectories will be created at need.",
+ "batchsize"));
+ options.addOption(buildOption("x", "xpath",
+ "When querying documents using -q, you may specify one or more XPath expressions to restrict the output to the elements referenced by your XPath expressions. Several XPaths must be delimited by a single comma.",
+ "xpath"));
+ options.addOption(buildOption("rh", "referencehops",
+ "The maximum number of allowed hops to tables referenced with a foreign key when creating subset tables.",
+ "max number of hops"));
+ options.addOption(buildOption("ts", "tableschema",
+ "Table Schema to use; currently only supported by -q mode. The name can be given or the index as retrieved by the -lts mode.",
+ "schemaname"));
+
+ // -------------------- authentication --------------------
+ options.addOption(buildOption("U", "url",
+ "URL to database server (e.g. jdbc:postgresql:///)", "url"));
+ options.addOption(buildOption("n", "username", "username for database", "username"));
+ options.addOption(buildOption("p", "pass", "password for database", "password"));
+ options.addOption(buildOption("pgs", "pgschema", "Postgres Schema to use", "schema"));
+ options.addOption(buildOption("srv", "server", "Server name to connect to", "servername"));
+ options.addOption(buildOption("db", "database", "Database to connect to", "database"));
+ options.addOption(buildOption("dbc", "databaseconfiguration",
+ "XML file specifying the user configuration (defaults to dbcConfiguration.xml).", "Config File"));
+
+ return options;
+ }
+
+ private static Option buildOption(String shortName, String longName, String description, String... arguments) {
+ OptionBuilder.withLongOpt(longName);
+ OptionBuilder.withDescription(description);
+ OptionBuilder.hasArgs(arguments.length);
+ for (String argument : arguments)
+ OptionBuilder.withArgName(argument);
+ return OptionBuilder.create(shortName);
+ }
+
+ /**
+ * @param dbc - databaseconnector
+ * @param tableName - name of the table to check
+ * @return true - if there was an error, otherwise false
+ */
+ private static boolean checkSchema(DataBaseConnector dbc, String tableName) {
+ boolean error = false;
+ String[] tablePath = tableName.split("\\.");
+ // if the table name has the form 'schemaname.tablename'
+ if (tablePath.length == 2 && !dbc.withConnectionQueryBoolean(c -> c.schemaExists(tablePath[0])))
+ dbc.createSchema(tablePath[0]);
+ else if (tablePath.length > 2) {
+ LOG.error(String.format(
+ "The table path %s is invalid. Only table names of the form 'tablename' or 'schemaname.tablename'are accepted.",
+ tableName));
+
+ }
+ return error;
+ }
+
+ private static String escapeSingleQuotes(String comment) {
+ return comment.replaceAll("'", "\\\\'");
+
+ }
+
+ private static List asListOfArrays(String fileStr) throws IOException {
+ List list = new ArrayList();
+ File file = new File(fileStr);
+ if (file != null) {
+ try (BufferedReader br = new BufferedReader(new FileReader(file))) {
+ String line = br.readLine();
+ while (line != null) {
+ list.add(line.split(KEY_PART_SEPERATOR));
+ line = br.readLine();
+ }
+ }
+ }
+ return list;
+ }
+
+ private static ArrayList asList(String fileStr) throws IOException {
+ ArrayList list = new ArrayList();
+ File file = new File(fileStr);
+ if (file != null) {
+ try (BufferedReader br = new BufferedReader(new FileReader(file))) {
+ String line = br.readLine();
+ while (line != null) {
+ list.add(line);
+ line = br.readLine();
+ }
+ }
+ }
+ return list;
+ }
+
+ private enum Mode {
+ IMPORT, QUERY, SUBSET, RESET, STATUS, ERROR, TABLES, LIST_TABLE_SCHEMAS, TABLE_DEFINITION, SCHEME, CHECK, DEFAULT_CONFIG, DROP_TABLE, UPDATE_MEDLINE
+ }
+
+ public static XMLConfiguration loadXmlConfiguration(File configurationFile) throws ConfigurationException {
+ try {
+ Parameters params = new Parameters();
+ FileBasedConfigurationBuilder configBuilder =
+ new FileBasedConfigurationBuilder<>(XMLConfiguration.class).configure(params
+ .xml()
+ .setFile(configurationFile));
+ return configBuilder.getConfiguration();
+ } catch (org.apache.commons.configuration2.ex.ConfigurationException e) {
+ throw new ConfigurationException(e);
+ }
+ }
+}
diff --git a/src/main/java/de/julielab/costosys/cli/ConfigurationNotFoundException.java b/src/main/java/de/julielab/costosys/cli/ConfigurationNotFoundException.java
new file mode 100644
index 0000000..044fc4a
--- /dev/null
+++ b/src/main/java/de/julielab/costosys/cli/ConfigurationNotFoundException.java
@@ -0,0 +1,22 @@
+package de.julielab.costosys.cli;
+
+public class ConfigurationNotFoundException extends Exception {
+ public ConfigurationNotFoundException() {
+ }
+
+ public ConfigurationNotFoundException(String message) {
+ super(message);
+ }
+
+ public ConfigurationNotFoundException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public ConfigurationNotFoundException(Throwable cause) {
+ super(cause);
+ }
+
+ public ConfigurationNotFoundException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) {
+ super(message, cause, enableSuppression, writableStackTrace);
+ }
+}
diff --git a/src/main/java/de/julielab/costosys/cli/ExtractDeleteCitations.java b/src/main/java/de/julielab/costosys/cli/ExtractDeleteCitations.java
new file mode 100644
index 0000000..0b77a2c
--- /dev/null
+++ b/src/main/java/de/julielab/costosys/cli/ExtractDeleteCitations.java
@@ -0,0 +1,85 @@
+/**
+ * ExtractDeleteCitations.java
+ *
+ * Copyright (c) 2010, JULIE Lab.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Common Public License v1.0
+ *
+ * Author: chew
+ *
+ * Current version: 1.0
+ * Since version: 1.0
+ *
+ * Creation date: 14.12.2010
+ **/
+
+package de.julielab.costosys.cli;
+
+import java.io.File;
+import java.io.FilenameFilter;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import de.julielab.costosys.Constants;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import de.julielab.xml.JulieXMLConstants;
+import de.julielab.xml.JulieXMLTools;
+
+/**
+ * Extracts PMIDs of deleted Medline documents from Medline Update XML batches.
+ * Currently the path to the XML files is hard coded, this should be made more
+ * flexible.
+ *
+ * @author faessler
+ */
+public class ExtractDeleteCitations {
+
+ private static final Logger LOG = LoggerFactory
+ .getLogger(ExtractDeleteCitations.class);
+
+ public static void main(String[] args) {
+ extractDeletedPMIDs();
+ }
+
+ private static void extractDeletedPMIDs() {
+ LOG.info("Starting extraction...");
+ File baseDir = new File("/data/data_corpora/medline/updates");
+ if (!baseDir.isDirectory()) {
+ LOG.error(String.format(
+ "Path %s does not point to a directory.",
+ baseDir.getAbsolutePath()));
+ System.exit(1);
+ }
+ String[] fileNames = baseDir.list(new FilenameFilter() {
+ public boolean accept(File arg0, String arg1) {
+ return arg1.endsWith(".gz");
+ }
+ });
+
+ String forEachXpath = "/MedlineCitationSet/DeleteCitation/PMID";
+ List> fields = new ArrayList>();
+ Map field = new HashMap();
+ field.put(JulieXMLConstants.NAME, Constants.PMID_FIELD_NAME);
+ field.put(JulieXMLConstants.XPATH,
+ "/MedlineCitationSet/DeleteCitation/PMID");
+ fields.add(field);
+
+ int bufferSize = 1000;
+ for (String fileName : fileNames) {
+ Iterator> it = JulieXMLTools.constructRowIterator(
+ baseDir.getAbsolutePath() + "/" + fileName, bufferSize,
+ forEachXpath, fields, false);
+
+ while (it.hasNext()) {
+ Map row = it.next();
+ String pmid = (String) row.get(Constants.PMID_FIELD_NAME);
+ System.out.println(pmid);
+ }
+ }
+ }
+}
diff --git a/src/main/java/de/julielab/costosys/cli/QueryOptions.java b/src/main/java/de/julielab/costosys/cli/QueryOptions.java
new file mode 100644
index 0000000..85b33c4
--- /dev/null
+++ b/src/main/java/de/julielab/costosys/cli/QueryOptions.java
@@ -0,0 +1,26 @@
+package de.julielab.costosys.cli;
+
+public class QueryOptions {
+
+ public String queryStr;
+ public String fileStr;
+ public String tableName;
+ public boolean useDelimiter;
+ public String xpath;
+ public String baseOutDirStr;
+ public String batchSizeStr;
+ public String limitStr;
+ public String whereClause;
+ public Integer numberRefHops;
+ public boolean pubmedArticleSet;
+ public String tableSchema;
+ @Override
+ public String toString() {
+ return "QueryOptions [queryStr=" + queryStr + ", fileStr=" + fileStr + ", tableName=" + tableName
+ + ", useDelimiter=" + useDelimiter + ", xpath=" + xpath + ", baseOutDirStr=" + baseOutDirStr
+ + ", batchSizeStr=" + batchSizeStr + ", limitStr=" + limitStr + ", whereClause=" + whereClause
+ + ", numberRefHops=" + numberRefHops + ", pubmedArticleSet=" + pubmedArticleSet + "]";
+ }
+
+
+}
diff --git a/src/main/java/de/julielab/costosys/cli/QueryPubMed.java b/src/main/java/de/julielab/costosys/cli/QueryPubMed.java
new file mode 100644
index 0000000..6b5b452
--- /dev/null
+++ b/src/main/java/de/julielab/costosys/cli/QueryPubMed.java
@@ -0,0 +1,85 @@
+package de.julielab.costosys.cli;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.net.URLEncoder;
+import java.util.ArrayList;
+
+import com.ximpleware.AutoPilot;
+import com.ximpleware.EOFException;
+import com.ximpleware.EncodingException;
+import com.ximpleware.EntityException;
+import com.ximpleware.NavException;
+import com.ximpleware.ParseException;
+import com.ximpleware.VTDGen;
+import com.ximpleware.VTDNav;
+import com.ximpleware.XPathEvalException;
+import com.ximpleware.XPathParseException;
+
+import de.julielab.xml.JulieXMLTools;
+
+public class QueryPubMed {
+ private final static String SITE = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi";
+ private final static String RETMAX = "100000000"; // 5x the size of PubMed (2011)
+ private final static int BUFFERSIZE = 1024;
+ private final static String XPATH = "/eSearchResult/IdList/Id";
+
+ /**
+ * Query PubMed via REST-API, returning up to 10e8 matched PMIDs.
+ * Queried terms get expanded, e.g. "Il-1" will match "interleukin-1".
+ * Searches with really many results (e.g. "cancer") need increased heap space!
+ * More details: http://eutils.ncbi.nlm.nih.gov/corehtml/query/static/esearch_help.html
+ *
+ * @param query -Query for PubMed as a String
+ * @return - ArrayList, containing PMIDs as Strings
+ */
+ public static ArrayList query(String query) {
+ ArrayList ids = new ArrayList();
+ try {
+ StringBuilder queryBuilder = new StringBuilder();
+ queryBuilder.append(SITE)
+ .append("?term=").append(URLEncoder.encode(query, "UTF-8"))
+ .append("&retmax=").append(RETMAX).append("&tool=julie-medline-manager")
+ .append("&email=julielab@listserv.uni-jena.de");
+ URL url = new URL(queryBuilder.toString());
+ InputStream stream = url.openStream();
+
+ VTDGen vg = new VTDGen(); // Parses XML
+ vg.setDoc(JulieXMLTools.readStream(stream, BUFFERSIZE));
+ vg.parse(true);
+ VTDNav vn = vg.getNav(); // Navigates in parsed XML
+ AutoPilot ap = new AutoPilot(vn); // Moves through whole XML
+
+ ap.selectXPath(XPATH);
+ while (ap.evalXPath() != -1) {
+ // 32 bits encoding length, 32 bits encoding offset
+ long fragment = vn.getContentFragment();
+ // right 32 bits
+ int offset = (int) fragment;
+ // left 32 bits, casts priority is higher than right-shifts
+ int length = (int) (fragment >> 32);
+ ids.add(vn.toString(offset, length));
+ }
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ } catch (EncodingException e) {
+ e.printStackTrace();
+ } catch (EOFException e) {
+ e.printStackTrace();
+ } catch (EntityException e) {
+ e.printStackTrace();
+ } catch (ParseException e) {
+ e.printStackTrace();
+ } catch (XPathParseException e) {
+ e.printStackTrace();
+ } catch (XPathEvalException e) {
+ e.printStackTrace();
+ } catch (NavException e) {
+ e.printStackTrace();
+ }
+
+ return ids;
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/de/julielab/costosys/cli/TableNotFoundException.java b/src/main/java/de/julielab/costosys/cli/TableNotFoundException.java
new file mode 100644
index 0000000..d62fc2d
--- /dev/null
+++ b/src/main/java/de/julielab/costosys/cli/TableNotFoundException.java
@@ -0,0 +1,16 @@
+package de.julielab.costosys.cli;
+
+public class TableNotFoundException extends Exception {
+ /**
+ *
+ */
+ private static final long serialVersionUID = -4868103716490551991L;
+
+ public TableNotFoundException() {
+ super();
+ }
+
+ public TableNotFoundException(String message) {
+ super(message);
+ }
+}
diff --git a/src/main/java/de/julielab/costosys/cli/Test.java b/src/main/java/de/julielab/costosys/cli/Test.java
new file mode 100644
index 0000000..57cf2a3
--- /dev/null
+++ b/src/main/java/de/julielab/costosys/cli/Test.java
@@ -0,0 +1,20 @@
+package de.julielab.costosys.cli;
+
+import java.io.*;
+import java.nio.file.FileSystem;
+import java.nio.file.FileSystems;
+import java.nio.file.Paths;
+import java.nio.file.StandardOpenOption;
+
+public class Test {
+ public static void main(String args[]) throws IOException {
+ try (FileSystem fs = FileSystems.newFileSystem(Paths.get("myfs.zip"), null)) {
+
+ OutputStream os = fs.provider().newOutputStream(fs.getPath("mow/entry.txt"), StandardOpenOption.CREATE, StandardOpenOption.WRITE);
+ try (BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(os))) {
+ bw.write("Here is content!");
+ }
+ }
+ System.out.println("Done");
+ }
+}
diff --git a/src/main/java/de/julielab/xmlData/config/ConfigBase.java b/src/main/java/de/julielab/costosys/configuration/ConfigBase.java
similarity index 97%
rename from src/main/java/de/julielab/xmlData/config/ConfigBase.java
rename to src/main/java/de/julielab/costosys/configuration/ConfigBase.java
index ccffaf4..02c0148 100644
--- a/src/main/java/de/julielab/xmlData/config/ConfigBase.java
+++ b/src/main/java/de/julielab/costosys/configuration/ConfigBase.java
@@ -1,4 +1,4 @@
-package de.julielab.xmlData.config;
+package de.julielab.costosys.configuration;
import java.io.IOException;
@@ -11,7 +11,7 @@
import com.ximpleware.VTDNav;
import com.ximpleware.XPathParseException;
-import de.julielab.xmlData.dataBase.DataBaseConnector;
+import de.julielab.costosys.dbconnection.DataBaseConnector;
diff --git a/src/main/java/de/julielab/costosys/configuration/ConfigReader.java b/src/main/java/de/julielab/costosys/configuration/ConfigReader.java
new file mode 100644
index 0000000..3c88dd1
--- /dev/null
+++ b/src/main/java/de/julielab/costosys/configuration/ConfigReader.java
@@ -0,0 +1,452 @@
+/**
+ * ConfigurationParser.java
+ *
+ * Copyright (c) 2011, JULIE Lab.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Common Public License v1.0
+ *
+ * Author: faessler/hellrich
+ *
+ * Current version: 1.0
+ * Since version: 1.0
+ *
+ * Creation date: 22.03.2011
+ **/
+
+package de.julielab.costosys.configuration;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.*;
+
+import org.apache.commons.io.IOUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.ximpleware.AutoPilot;
+import com.ximpleware.NavException;
+import com.ximpleware.VTDException;
+import com.ximpleware.VTDGen;
+import com.ximpleware.VTDNav;
+import com.ximpleware.XMLModifier;
+import com.ximpleware.XPathEvalException;
+import com.ximpleware.XPathParseException;
+
+import de.julielab.xml.JulieXMLTools;
+
+/**
+ * This class reads an xml configuration file, containing the definition of a
+ * database connection and the fields used in the database. It provides those
+ * definitions as specialized objects.
+ *
+ * @author hellrich
+ */
+public class ConfigReader {
+
+ private static final Logger LOG = LoggerFactory
+ .getLogger(ConfigReader.class);
+ private final static int BUFFER_SIZE = 1000;
+ public static final String DEFAULT_DEFINITION = "/defaultConfiguration.xml";
+
+ public static final String XPATH_ACTIVE_TABLE_SCHEMA = "//activeTableSchema";
+ public static final String XPATH_ACTIVE_DB = "//activeDBConnection";
+ public static final String XPATH_ACTIVE_PG_SCHEMA = "//activePostgresSchema";
+ public static final String XPATH_MAX_CONNS = "//maxActiveDBConnections";
+
+ public static final String XPATH_CONF_DBS = "//DBConnections";
+ public static final String XPATH_CONF_SCHEMAS = "//tableSchemas";
+
+ public static final String XPATH_CONF_DB = "//DBConnection";
+ public static final String XPATH_CONF_SCHEMA = "//tableSchema";
+
+ private static final int INDEX_SCHEMA = 0;
+ private static final int INDEX_DB = 1;
+ private static final int INDEX_PG_SCHEMA = 2;
+ private static final int INDEX_MAX_CONNS = 3;
+ private static final int INDEX_DATA_TABLE = 4;
+ private static final int INDEX_DATA_SCHEMA = 5;
+
+ private static final String XPATH_DATA_TABLE = "//dataTable";
+ private static final String XPATH_DATA_SCHEMA = "//activeDataPostgresSchema";
+ private static final String ATTRIBUTE_NAME = "name";
+
+ private FieldConfigurationManager fieldConfigs;
+ private DBConfig dbConf;
+ private String activeDataTable;
+ private String activeSchemaName;
+ private byte[] mergedConfigData;
+ private String activeDataSchema;
+ private List schemaNames;
+
+ public ConfigReader(InputStream def) {
+ try {
+ byte[] defaultConfData = null;
+ byte[] userConfData = null;
+
+ InputStream is = getClass().getResourceAsStream(DEFAULT_DEFINITION);
+ defaultConfData = IOUtils.toByteArray(is);
+ is.close();
+ // check if the user gave a table schema definition;
+ // if not, the default values will be used
+ if (def != null) {
+ userConfData = IOUtils.toByteArray(def);
+ def.close();
+ }
+ mergedConfigData = mergeConfigData(defaultConfData, userConfData);
+
+ schemaNames = getAllSchemaNames(mergedConfigData);
+
+ // Creating
+ fieldConfigs = new FieldConfigurationManager();
+ for (String schemaName : schemaNames)
+ fieldConfigs.put(schemaName, new FieldConfig(mergedConfigData,
+ schemaName));
+ dbConf = new DBConfig(mergedConfigData);
+ activeDataTable = ConfigBase.getActiveConfig(mergedConfigData,
+ XPATH_DATA_TABLE);
+ activeDataSchema = ConfigBase.getActiveConfig(mergedConfigData,
+ XPATH_DATA_SCHEMA);
+ activeSchemaName = ConfigBase.getActiveConfig(mergedConfigData,
+ XPATH_ACTIVE_TABLE_SCHEMA);
+
+ LOG.debug("Active data table: {}", activeDataTable);
+ LOG.debug("Active Postgres data schema: {}", activeDataSchema);
+ LOG.debug("Active table schema: {}", activeSchemaName);
+ } catch (IOException e) {
+ e.printStackTrace();
+ } catch (VTDException e) {
+ LOG.error("Parsing of configuration file failed:", e);
+ }
+
+ }
+
+ /**
+ * @param mergedConfigData
+ * @return
+ */
+ private List getAllSchemaNames(byte[] mergedConfigData)
+ throws VTDException {
+ List schemaNames = new ArrayList();
+
+ VTDGen vg = new VTDGen();
+ vg.setDoc(mergedConfigData);
+ vg.parse(true);
+ VTDNav vn = vg.getNav();
+ // Navigates through schema elements
+ AutoPilot schemaAP = new AutoPilot(vn);
+ schemaAP.selectXPath(XPATH_CONF_SCHEMA);
+ // Returns the name attribute value for current schema, navigated to by
+ // schemaAP.
+ AutoPilot schemaNameAP = new AutoPilot(vn);
+ schemaNameAP.selectXPath("@name");
+
+ while (schemaAP.evalXPath() != -1)
+ schemaNames.add(schemaNameAP.evalXPathToString());
+
+ return schemaNames;
+ }
+
+ /**
+ * Inserts the user schemes into the default configuration. This makes all
+ * data available in one place, which is useful for referencing default
+ * values from within a user configuration.
+ *
+ * @param defaultConfData
+ * - prepared default configuration file
+ * @param userConfData
+ * - prepared user specific configuration file
+ * @return - the merged configuration
+ * @throws VTDException
+ * @throws IOException
+ */
+ protected static byte[] mergeConfigData(byte[] defaultConfData,
+ byte[] userConfData) throws VTDException, IOException {
+ VTDGen vg = new VTDGen();
+ vg.setDoc(defaultConfData);
+ vg.parse(true);
+ VTDNav vn = vg.getNav();
+ AutoPilot ap = new AutoPilot(vn);
+
+ if (userConfData == null) {
+ return defaultConfData;
+ //throw new IllegalArgumentException("No CoStoSys user configuration was passed.");
+ }
+
+ XMLModifier xm = new XMLModifier(vn);
+
+ // Get user defined schema and DB connection data.
+ byte[][] userDefs = extractConfigData(userConfData);
+
+ // Add schema data to the default configuration.
+ if (userDefs[INDEX_SCHEMA] != null) {
+ ap.selectXPath(XPATH_CONF_SCHEMA);
+ if (ap.evalXPath() != -1) {
+ xm.insertAfterElement(userDefs[INDEX_SCHEMA]);
+ }
+ }
+
+ // Add DB connection data to the default configuration.
+ if (userDefs[INDEX_DB] != null) {
+ ap.selectXPath(XPATH_CONF_DB);
+ if (ap.evalXPath() != -1) {
+ xm.insertAfterElement(userDefs[INDEX_DB]);
+ }
+ }
+
+ // Get active table schema, active data postgres schema, active DB
+ // connection and more
+ // from the user configuration, if these declarations exist.
+ String[] activeConfs = getActiveConfigurations(userConfData);
+ LOG.debug("Found the following active configurations in the user data: {}", Arrays.toString(activeConfs));
+
+ // Insert the active configurations into the merged configuration, thus
+ // overwriting the defaults.
+ if (activeConfs[INDEX_SCHEMA].length() > 0) {
+ int newTextIndex = JulieXMLTools.setElementText(vn, ap, xm,
+ XPATH_ACTIVE_TABLE_SCHEMA, activeConfs[INDEX_SCHEMA]);
+ LOG.trace("Set the active table schema to {}. Returned new index: {}", activeConfs[INDEX_SCHEMA], newTextIndex);
+ if (newTextIndex == -1) {
+ throw new IllegalStateException(
+ "There is no active table schema defined. Please define an active table schema in your user " +
+ " configuration. The user configuration is: " + new String(userConfData, StandardCharsets.UTF_8));
+ }
+ }
+ if (activeConfs[INDEX_DB].length() > 0) {
+ int newTextIndex = JulieXMLTools.setElementText(vn, ap, xm,
+ XPATH_ACTIVE_DB, activeConfs[INDEX_DB]);
+ if (newTextIndex == -1) {
+// throw new IllegalStateException(
+// "Unexpected error: The default configuration does not define an active database connection. Please define an active DB connection in your user configuration.");
+ LOG.warn("The default configuration does not define an active database connection.");
+ }
+
+ }
+ if (activeConfs[INDEX_PG_SCHEMA].length() > 0) {
+ int newTextIndex = JulieXMLTools.setElementText(vn, ap, xm,
+ XPATH_ACTIVE_PG_SCHEMA, activeConfs[INDEX_PG_SCHEMA]);
+ if (newTextIndex == -1)
+ throw new IllegalStateException(
+ "Unexpected error: The default configuration does not define an active Postgres schema. Please define an active Postgres schema in your user configuration.");
+ }
+ if (activeConfs[INDEX_MAX_CONNS].length() > 0) {
+ int newTextIndex = JulieXMLTools.setElementText(vn, ap, xm,
+ XPATH_MAX_CONNS, activeConfs[INDEX_MAX_CONNS]);
+ if (newTextIndex == -1) {
+// throw new IllegalStateException(
+// "Unexpected error: The default configuration does not define a maximal number of database connections. Please define a maximal number of connections in your user configuration.");
+ LOG.warn("Unexpected error: The default configuration does not define a maximal number of database connections");
+ }
+ }
+ if (activeConfs[INDEX_DATA_TABLE].length() > 0) {
+ int newTextIndex = JulieXMLTools.setElementText(vn, ap, xm,
+ XPATH_DATA_TABLE, activeConfs[INDEX_DATA_TABLE]);
+ if (newTextIndex == -1)
+ throw new IllegalStateException(
+ "Unexpected error: The default configuration does not define a _data table. Please define a _data table in your user configuration.");
+ }
+
+ if (activeConfs[INDEX_DATA_SCHEMA].length() > 0) {
+ int newTextIndex = JulieXMLTools.setElementText(vn, ap, xm,
+ XPATH_DATA_SCHEMA, activeConfs[INDEX_DATA_SCHEMA]);
+ if (newTextIndex == -1)
+ throw new IllegalStateException(
+ "Unexpected error: The default configuration does not define an active data Postgres schema. Please define a data postgres schema in your user configuration.");
+ }
+
+ // Test validity of merged xml (no doublets)
+ vn = xm.outputAndReparse();
+
+ String doublet = getDoublet(vn, XPATH_CONF_SCHEMA);
+ if (doublet != null)
+ throw new IllegalStateException(
+ "Unexpected error: You may not define "
+ + doublet
+ + "as this schema is already defined in the default configuration!");
+
+ doublet = getDoublet(vn, XPATH_CONF_DB);
+ if (doublet != null)
+ throw new IllegalStateException(
+ "Unexpected error: You may not define "
+ + doublet
+ + "as this connection is already defined in the default configuration!");
+
+ // Return the merged configuration data.
+ ByteArrayOutputStream os = new ByteArrayOutputStream();
+ xm.output(os);
+ return os.toByteArray();
+ }
+
+ private static String getDoublet(VTDNav vn, String xpath) {
+ String doublet = "";
+ AutoPilot ap = new AutoPilot(vn);
+ try {
+ ap.selectXPath(xpath);
+ int index = ap.evalXPath();
+ String name = null;
+ Set found = new HashSet();
+ while (index != -1) {
+ int attrIndex = vn.getAttrVal(ATTRIBUTE_NAME);
+ if (attrIndex != -1) {
+ name = vn.toString(attrIndex);
+ if (found.contains(name))
+ doublet = doublet.concat(name).concat(", ");
+ else
+ found.add(name);
+ }
+ index = ap.evalXPath();
+ }
+ } catch (XPathParseException e) {
+ e.printStackTrace();
+ } catch (XPathEvalException e) {
+ e.printStackTrace();
+ } catch (NavException e) {
+ e.printStackTrace();
+ }
+ return doublet.equals("") ? null : doublet;
+ }
+
+ /**
+ * Extracts the active configuration names (e.g. table schema) from the
+ * configuration data given by confData
. Returns an array of
+ * active configuration names where the position of a specific active
+ * configuration name in the array is determined by the constants
+ * INDEX_SCHEMA
, INDEX_DB
etc.
+ *
+ * @param confData
+ * Configuration data to extract active configuration names from.
+ * @return A String array with the names of active configurations. The Array
+ * is index by the INDEX_XXX
constants.
+ * @throws VTDException
+ * If something concerning the parsing and value extraction goes
+ * wrong.
+ */
+ private static String[] getActiveConfigurations(byte[] confData)
+ throws VTDException {
+ VTDGen vg = new VTDGen();
+ vg.setDoc(confData);
+ vg.parse(true);
+ VTDNav vn = vg.getNav();
+ AutoPilot ap = new AutoPilot(vn);
+
+ String[] activeConfigurations = new String[6];
+ ap.selectXPath(XPATH_ACTIVE_PG_SCHEMA);
+ activeConfigurations[INDEX_PG_SCHEMA] = ap.evalXPathToString();
+
+ ap.selectXPath(XPATH_ACTIVE_TABLE_SCHEMA);
+ activeConfigurations[INDEX_SCHEMA] = ap.evalXPathToString();
+
+ ap.selectXPath(XPATH_ACTIVE_DB);
+ activeConfigurations[INDEX_DB] = ap.evalXPathToString();
+
+
+ ap.selectXPath(XPATH_MAX_CONNS);
+ activeConfigurations[INDEX_MAX_CONNS] = ap.evalXPathToString();
+
+ ap.selectXPath(XPATH_DATA_TABLE);
+ activeConfigurations[INDEX_DATA_TABLE] = ap.evalXPathToString();
+
+ ap.selectXPath(XPATH_DATA_SCHEMA);
+ activeConfigurations[INDEX_DATA_SCHEMA] = ap.evalXPathToString();
+
+ return activeConfigurations;
+ }
+
+ /**
+ * Retrieves XML elements (determined by the used path) from the
+ * configuration.
+ *
+ * @param confData
+ * - prepared XML
+ * @return - the retrieved element
+ * @throws IOException
+ * @throws VTDException
+ */
+ protected static byte[][] extractConfigData(byte[] confData)
+ throws IOException, VTDException {
+ // Allocate space for schema and DB connection data.
+ byte[][] configData = new byte[2][];
+ VTDGen vg = new VTDGen();
+ vg.setDoc(confData);
+ vg.parse(true);
+ VTDNav vn = vg.getNav();
+ AutoPilot ap = new AutoPilot(vn);
+
+ // Get schema definition data.
+ ap.selectXPath(XPATH_CONF_SCHEMAS);
+
+ if (ap.evalXPath() != -1) {
+ String fragment = JulieXMLTools.getFragment(vn, JulieXMLTools.CONTENT_FRAGMENT,
+ true);
+ configData[INDEX_SCHEMA] = fragment.getBytes();
+ }
+
+ // Get database connection data.
+ ap.selectXPath(XPATH_CONF_DBS);
+
+ if (ap.evalXPath() != -1) {
+ String fragment = JulieXMLTools.getFragment(vn, JulieXMLTools.CONTENT_FRAGMENT,
+ true);
+ configData[INDEX_DB] = fragment.getBytes();
+ }
+
+ return configData;
+ }
+
+ /**
+ * Accessing the Database Configuration
+ *
+ * @return - DatabaseConfig Object
+ */
+ public DBConfig getDatabaseConfig() {
+ return dbConf;
+ }
+
+ /**
+ *
+ * Accessing the Field Definitions.
+ *
+ *
+ * The returned map consists of pairs in the form
+ * (schemaName, fieldConfig)
where schemaName
is
+ * the name of the table schema represented by fieldConfig
.
+ *
+ *
+ * @return - A map containing all table schemas in the default and in the
+ * user configuration.
+ */
+ public FieldConfigurationManager getFieldConfigs() {
+ return fieldConfigs;
+ }
+
+ /**
+ * @return the activeDataTable
+ */
+ public String getActiveDataTable() {
+ return activeDataTable;
+ }
+
+ public String getActiveDataSchema() {
+ return activeDataSchema;
+ }
+
+ /**
+ * @return the activeSchemaName
+ */
+ public String getActiveSchemaName() {
+ return activeSchemaName;
+ }
+
+ /**
+ * @return the mergedConfigData
+ */
+ public byte[] getMergedConfigData() {
+ return mergedConfigData;
+ }
+
+ public List getTableSchemaNames() {
+ return schemaNames;
+ }
+
+}
diff --git a/src/main/java/de/julielab/xmlData/config/DBConfig.java b/src/main/java/de/julielab/costosys/configuration/DBConfig.java
similarity index 99%
rename from src/main/java/de/julielab/xmlData/config/DBConfig.java
rename to src/main/java/de/julielab/costosys/configuration/DBConfig.java
index c8e6fc0..5168137 100644
--- a/src/main/java/de/julielab/xmlData/config/DBConfig.java
+++ b/src/main/java/de/julielab/costosys/configuration/DBConfig.java
@@ -1,4 +1,4 @@
-package de.julielab.xmlData.config;
+package de.julielab.costosys.configuration;
import java.io.IOException;
diff --git a/src/main/java/de/julielab/xmlData/config/FieldConfig.java b/src/main/java/de/julielab/costosys/configuration/FieldConfig.java
similarity index 99%
rename from src/main/java/de/julielab/xmlData/config/FieldConfig.java
rename to src/main/java/de/julielab/costosys/configuration/FieldConfig.java
index a6c8532..d0b4e0a 100644
--- a/src/main/java/de/julielab/xmlData/config/FieldConfig.java
+++ b/src/main/java/de/julielab/costosys/configuration/FieldConfig.java
@@ -13,7 +13,7 @@
* Creation date: 11.03.2011
**/
-package de.julielab.xmlData.config;
+package de.julielab.costosys.configuration;
import java.util.ArrayList;
import java.util.HashMap;
@@ -22,6 +22,7 @@
import java.util.Map;
import java.util.stream.Stream;
+import de.julielab.costosys.Constants;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -41,7 +42,6 @@
import de.julielab.xml.JulieXMLConstants;
import de.julielab.xml.JulieXMLTools;
-import de.julielab.xmlData.Constants;
/**
* This class holds the definition of fields for the database table to work
diff --git a/src/main/java/de/julielab/costosys/configuration/FieldConfigurationManager.java b/src/main/java/de/julielab/costosys/configuration/FieldConfigurationManager.java
new file mode 100644
index 0000000..5719d71
--- /dev/null
+++ b/src/main/java/de/julielab/costosys/configuration/FieldConfigurationManager.java
@@ -0,0 +1,64 @@
+/**
+ * FieldConfigurationManager.java
+ *
+ * Copyright (c) 2013, JULIE Lab.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Common Public License v1.0
+ *
+ * Author: faessler
+ *
+ * Current version: 1.0
+ * Since version: 1.0
+ *
+ * Creation date: 01.02.2013
+ **/
+
+/**
+ *
+ */
+package de.julielab.costosys.configuration;
+
+import java.util.HashMap;
+
+import org.apache.commons.lang3.StringUtils;
+
+/**
+ *
+ * This class is essentially a HashMap .
+ *
+ *
+ * It maps table schema names defined in the default or user provided
+ * configuration to the {@link FieldConfig} objects modeling these schemas. This
+ * class adds some minor validity checks to the default map methods.
+ *
+ *
+ * @author faessler
+ *
+ */
+public class FieldConfigurationManager extends HashMap {
+
+ /**
+ *
+ */
+ private static final long serialVersionUID = -6516109594561720970L;
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see java.util.HashMap#get(java.lang.Object)
+ */
+ @Override
+ public FieldConfig get(Object key) {
+ if (null == key || StringUtils.isBlank(key.toString()))
+ throw new TableSchemaDoesNotExistException(
+ "The name of the table schema to fetch was null.");
+
+ FieldConfig fieldConfig = super.get(key);
+ if (null == fieldConfig) {
+ throw new TableSchemaDoesNotExistException("The requested table schema definition \"" + key
+ + "\" is not defined in the default configuration or the user provided configuration.");
+ }
+ return fieldConfig;
+ }
+
+}
diff --git a/src/main/java/de/julielab/hiddenConfig/HiddenConfig.java b/src/main/java/de/julielab/costosys/configuration/HiddenConfig.java
similarity index 95%
rename from src/main/java/de/julielab/hiddenConfig/HiddenConfig.java
rename to src/main/java/de/julielab/costosys/configuration/HiddenConfig.java
index 74a3814..cc1f8ca 100644
--- a/src/main/java/de/julielab/hiddenConfig/HiddenConfig.java
+++ b/src/main/java/de/julielab/costosys/configuration/HiddenConfig.java
@@ -1,4 +1,4 @@
-package de.julielab.hiddenConfig;
+package de.julielab.costosys.configuration;
import java.io.BufferedReader;
import java.io.File;
@@ -13,7 +13,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import de.julielab.xmlData.Constants;
+import de.julielab.costosys.Constants;
/**
* This class reads a hidden configuration file in the users home directory. If no such file exists, a new one can be
@@ -35,7 +35,7 @@ public class HiddenConfig {
private File configFile;
/**
- * Reads a hidden config file in the users home directory.
+ * Reads a hidden configuration file in the users home directory.
*/
public HiddenConfig() {
String home = System.getProperty("user.home");
@@ -101,7 +101,7 @@ else if (homeDir.exists())
}
/**
- * @return - The username in the hidden config file
+ * @return - The username in the hidden configuration file
*
*/
public String getUsername(String DBConnectionName) {
@@ -109,7 +109,7 @@ public String getUsername(String DBConnectionName) {
}
/**
- * @return - The password in the hidden config file
+ * @return - The password in the hidden configuration file
*
*/
public String getPassword(String DBConnectionName) {
diff --git a/src/main/java/de/julielab/xmlData/config/TableSchemaDoesNotExistException.java b/src/main/java/de/julielab/costosys/configuration/TableSchemaDoesNotExistException.java
similarity index 93%
rename from src/main/java/de/julielab/xmlData/config/TableSchemaDoesNotExistException.java
rename to src/main/java/de/julielab/costosys/configuration/TableSchemaDoesNotExistException.java
index 3c7390c..f806f4b 100644
--- a/src/main/java/de/julielab/xmlData/config/TableSchemaDoesNotExistException.java
+++ b/src/main/java/de/julielab/costosys/configuration/TableSchemaDoesNotExistException.java
@@ -16,7 +16,7 @@
/**
*
*/
-package de.julielab.xmlData.config;
+package de.julielab.costosys.configuration;
/**
* @author faessler
diff --git a/src/main/java/de/julielab/xmlData/dataBase/CoStoSysConnection.java b/src/main/java/de/julielab/costosys/dbconnection/CoStoSysConnection.java
similarity index 95%
rename from src/main/java/de/julielab/xmlData/dataBase/CoStoSysConnection.java
rename to src/main/java/de/julielab/costosys/dbconnection/CoStoSysConnection.java
index 5a1e57e..be3ff2c 100644
--- a/src/main/java/de/julielab/xmlData/dataBase/CoStoSysConnection.java
+++ b/src/main/java/de/julielab/costosys/dbconnection/CoStoSysConnection.java
@@ -1,6 +1,6 @@
-package de.julielab.xmlData.dataBase;
+package de.julielab.costosys.dbconnection;
-import de.julielab.xmlData.dataBase.util.CoStoSysSQLRuntimeException;
+import de.julielab.costosys.dbconnection.util.CoStoSysSQLRuntimeException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/src/main/java/de/julielab/xmlData/dataBase/ConnectionClosable.java b/src/main/java/de/julielab/costosys/dbconnection/ConnectionClosable.java
similarity index 61%
rename from src/main/java/de/julielab/xmlData/dataBase/ConnectionClosable.java
rename to src/main/java/de/julielab/costosys/dbconnection/ConnectionClosable.java
index 0a05ae6..0df0c88 100644
--- a/src/main/java/de/julielab/xmlData/dataBase/ConnectionClosable.java
+++ b/src/main/java/de/julielab/costosys/dbconnection/ConnectionClosable.java
@@ -1,4 +1,4 @@
-package de.julielab.xmlData.dataBase;
+package de.julielab.costosys.dbconnection;
public interface ConnectionClosable {
void closeConnection();
diff --git a/src/main/java/de/julielab/xmlData/dataBase/DBCIterator.java b/src/main/java/de/julielab/costosys/dbconnection/DBCIterator.java
similarity index 95%
rename from src/main/java/de/julielab/xmlData/dataBase/DBCIterator.java
rename to src/main/java/de/julielab/costosys/dbconnection/DBCIterator.java
index 20f3c62..9125738 100644
--- a/src/main/java/de/julielab/xmlData/dataBase/DBCIterator.java
+++ b/src/main/java/de/julielab/costosys/dbconnection/DBCIterator.java
@@ -16,7 +16,7 @@
/**
*
*/
-package de.julielab.xmlData.dataBase;
+package de.julielab.costosys.dbconnection;
import java.util.Iterator;
diff --git a/src/main/java/de/julielab/xmlData/dataBase/DBCThreadedIterator.java b/src/main/java/de/julielab/costosys/dbconnection/DBCThreadedIterator.java
similarity index 92%
rename from src/main/java/de/julielab/xmlData/dataBase/DBCThreadedIterator.java
rename to src/main/java/de/julielab/costosys/dbconnection/DBCThreadedIterator.java
index b2969f9..74731d6 100644
--- a/src/main/java/de/julielab/xmlData/dataBase/DBCThreadedIterator.java
+++ b/src/main/java/de/julielab/costosys/dbconnection/DBCThreadedIterator.java
@@ -1,6 +1,6 @@
-package de.julielab.xmlData.dataBase;
+package de.julielab.costosys.dbconnection;
-import de.julielab.xmlData.dataBase.util.CoStoSysSQLRuntimeException;
+import de.julielab.costosys.dbconnection.util.CoStoSysSQLRuntimeException;
import java.util.Iterator;
import java.util.List;
diff --git a/src/main/java/de/julielab/costosys/dbconnection/DataBaseConnector.java b/src/main/java/de/julielab/costosys/dbconnection/DataBaseConnector.java
new file mode 100644
index 0000000..e9f0a40
--- /dev/null
+++ b/src/main/java/de/julielab/costosys/dbconnection/DataBaseConnector.java
@@ -0,0 +1,3995 @@
+package de.julielab.costosys.dbconnection;
+
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
+import com.zaxxer.hikari.HikariConfig;
+import com.zaxxer.hikari.HikariDataSource;
+import com.zaxxer.hikari.HikariPoolMXBean;
+import de.julielab.costosys.Constants;
+import de.julielab.costosys.cli.TableNotFoundException;
+import de.julielab.costosys.configuration.*;
+import de.julielab.costosys.dbconnection.util.*;
+import de.julielab.xml.JulieXMLConstants;
+import de.julielab.xml.JulieXMLTools;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.tuple.ImmutablePair;
+import org.apache.commons.lang3.tuple.Pair;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.management.JMX;
+import javax.management.MBeanServer;
+import javax.management.ObjectName;
+import java.io.*;
+import java.lang.management.ManagementFactory;
+import java.sql.*;
+import java.util.*;
+import java.util.Map.Entry;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.ExecutionException;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+import java.util.stream.Stream;
+
+/**
+ * This class creates a connection with a database and allows for convenient
+ * queries and commands.
+ * Database layout and returned columns are specified by a configuration file.
+ * The class was developed for a PostgreSQL back-end, using another database
+ * server may require modifications.
+ * Queries use up to 3 threads for higher performance and a connection pool is
+ * used for higher performance if multiple instances are deployed simultaneous.
+ *
+ * Visit
+ * http://commons.apache.org/dbcp/apidocs/org/apache/commons/dbcp/package-
+ * summary.html#package_description<\code> for more information about the
+ * connection pooling.
+ *
+ * @author hellrich, faessler
+ */
+public class DataBaseConnector {
+
+ public static final String DEFAULT_PIPELINE_STATE = "";
+ /**
+ * Used as a hack for the not-yet-published EMNLP-Paper. In the meantime, a more
+ * sophisticated system has been implemented (EF, 18.01.2012)
+ */
+ @Deprecated
+ public static final int META_IN_ARRAY = 2;
+ /**
+ * This is the definition of subset tables except the primary key.
+ */
+ public static final LinkedHashMap subsetColumns;
+ /**
+ * Size of the batches used for data retrieval from the database, value is
+ * optimized for xml-clobs in postgres on 2010 hardware.
+ */
+ private static final int DEFAULT_QUERY_BATCH_SIZE = 1000;
+ /**
+ * Size of the byte buffer used for reading xml into vtd (xml parser)
+ */
+ private final static int BUFFER_SIZE = 1000;
+ private static final String DEFAULT_FIELD = "xml";
+ private static final String DEFAULT_TABLE = Constants.DEFAULT_DATA_TABLE_NAME;
+ private static final int commitBatchSize = 100;
+ private static final int RETRIEVE_MARK_LIMIT = 1000;
+ private static final int ID_SUBLIST_SIZE = 1000;
+ private static final Map pools = new ConcurrentHashMap<>();
+ /**
+ * A set of field definitions read from a configuration XML file. Contains the
+ * name of each field as well as a source for the field's value.
+ */
+ // private FieldConfig fieldConfig;
+ // For import
+ private static Logger LOG = LoggerFactory.getLogger(DataBaseConnector.class);
+ private static Thread commitThread = null;
+ private static LoadingCache> connectionCache = CacheBuilder
+ .newBuilder()
+ // The weak keys are the main reason to use the cache. It allows to garbage collect the threads
+ // that have reserved connections and did never release them. Those threads would be held in memory
+ // when we used strong references which would be a memory leak.
+ .weakKeys()
+ .build(new CacheLoader>() {
+ @Override
+ public List load(Thread thread) {
+ return new ArrayList<>();
+ }
+ });
+ private static HikariDataSource dataSource;
+
+ static {
+ subsetColumns = new LinkedHashMap<>();
+ subsetColumns.put(Constants.LOG, "text");
+ subsetColumns.put(Constants.IS_PROCESSED, "boolean DEFAULT false");
+ subsetColumns.put(Constants.IN_PROCESS, "boolean DEFAULT false");
+ subsetColumns.put(Constants.LAST_COMPONENT, "text DEFAULT '" + DEFAULT_PIPELINE_STATE + "'");
+ subsetColumns.put(Constants.HAS_ERRORS, "boolean DEFAULT false");
+ subsetColumns.put(Constants.PID, "character varying(10)");
+ subsetColumns.put(Constants.HOST_NAME, "character varying(100)");
+ subsetColumns.put(Constants.PROCESSING_TIMESTAMP, "timestamp without time zone");
+ }
+
+ /**
+ * Sometimes it is necessary to manage multiple data tables with different field
+ * schemas. fieldConfigs contains all field schema names in the configuration,
+ * mapped to the corresponding FieldConfig instance.
+ */
+ private FieldConfigurationManager fieldConfigs;
+ private DBConfig dbConfig;
+ private String activeDataSchema;
+ private String activeDataTable;
+ private String activeTableSchema;
+ private byte[] effectiveConfiguration;
+ private int queryBatchSize = DEFAULT_QUERY_BATCH_SIZE;
+ private String dbURL;
+ private String user;
+ private String password;
+ private ConfigReader config;
+
+ /**************************************************************************
+ *************************** Constructors ********************************
+ **************************************************************************/
+
+
+ public DataBaseConnector(String configPath) throws FileNotFoundException {
+ this(findConfigurationFile(configPath));
+ }
+
+ /**
+ * This class creates a connection with a database and allows for convenient
+ * queries and commands.
+ *
+ * @param configStream used to read the configuration for this connector instance
+ */
+ public DataBaseConnector(InputStream configStream) {
+ config = new ConfigReader(configStream);
+ dbConfig = config.getDatabaseConfig();
+ this.dbURL = dbConfig.getUrl();
+ this.fieldConfigs = config.getFieldConfigs();
+ this.activeDataSchema = config.getActiveDataSchema();
+ this.activeDataTable = config.getActiveDataTable().contains(".") ? config.getActiveDataTable() : this.activeDataSchema + "." + config.getActiveDataTable();
+ this.activeTableSchema = config.getActiveSchemaName();
+ this.effectiveConfiguration = config.getMergedConfigData();
+
+ if (!StringUtils.isBlank(dbConfig.getActiveDatabase()) && (StringUtils.isBlank(user) || StringUtils.isBlank(password))) {
+ HiddenConfig hc = new HiddenConfig();
+ this.user = hc.getUsername(dbConfig.getActiveDatabase());
+ this.password = hc.getPassword(dbConfig.getActiveDatabase());
+ LOG.info("Connecting to " + this.dbURL + " as " + this.user);
+ } else {
+ LOG.warn(
+ "No active database configured in configuration file or configuration file is empty or does not exist.");
+ }
+ LOG.info("Active Postgres schema: {}", dbConfig.getActivePGSchema() );
+ LOG.info("Active data Postgres schema: {}", dbConfig.getActiveDataPGSchema() );
+ }
+
+ /**
+ * This class creates a connection with a database and allows for convenient
+ * queries and commands.
+ *
+ * @param configStream used to read the configuration for this connector instance
+ * @param queryBatchSize background threads are utilized to speed up queries, this
+ * parameter determines the number of pre-fetched entries
+ */
+ public DataBaseConnector(InputStream configStream, int queryBatchSize) {
+ this(configStream);
+ this.queryBatchSize = queryBatchSize;
+ }
+
+ /**
+ * This class creates a connection with a database and allows for convenient
+ * queries and commands.
+ *
+ * @param dbUrl the url of the database
+ * @param user the username for the db
+ * @param password the password for the username
+ * @param fieldDefinition InputStream containing data of a configuration file
+ */
+ public DataBaseConnector(String dbUrl, String user, String password, String pgSchema, InputStream fieldDefinition) {
+ this(dbUrl, user, password, pgSchema, DEFAULT_QUERY_BATCH_SIZE, fieldDefinition);
+ }
+
+ public DataBaseConnector(String serverName, String dbName, String user, String password, String pgSchema,
+ InputStream fieldDefinition) {
+ this(serverName, dbName, user, password, pgSchema, DEFAULT_QUERY_BATCH_SIZE, fieldDefinition);
+ }
+
+ /**
+ * This class creates a connection with a database and allows for convenient
+ * queries and commands.
+ *
+ * @param dbUrl the url of the database
+ * @param user the username for the db
+ * @param password the password for the username
+ * @param queryBatchSize background threads are utilized to speed up queries, this
+ * parameter determines the number of pre-fetched entries
+ * @param configStream used to read the configuration for this connector instance
+ */
+ public DataBaseConnector(String dbUrl, String user, String password, String pgSchema, int queryBatchSize,
+ InputStream configStream) {
+ this(configStream, queryBatchSize);
+ // Manually entered values have priority.
+ setCredentials(dbUrl, user, password, pgSchema);
+ }
+
+ public DataBaseConnector(String serverName, String dbName, String user, String password, String pgSchema,
+ int queryBatchSize, InputStream configStream) {
+ this(configStream, queryBatchSize);
+ // Manually entered values have priority.
+ String dbUrl = null;
+ if (dbName != null && serverName != null)
+ dbUrl = "jdbc:postgresql://" + serverName + ":5432/" + dbName;
+ else {
+ if (dbName != null)
+ dbUrl = dbConfig.getUrl().replaceFirst("/[^/]+$", "/" + dbName);
+ if (serverName != null)
+ dbUrl = dbConfig.getUrl().replaceFirst("(.*//)[^/:]+(.*)", "$1" + serverName + "$2");
+ }
+
+ setCredentials(dbUrl, user, password, pgSchema);
+ }
+
+ /**
+ * This class creates a connection with a database and allows for convenient
+ * queries and commands.
+ *
+ * @param dbUrl the url of the database
+ * @param user the username for the db
+ * @param password the password for the username
+ */
+ public DataBaseConnector(String dbUrl, String user, String password) {
+ this(dbUrl, user, password, null, DEFAULT_QUERY_BATCH_SIZE, null);
+ }
+
+ private static InputStream findConfigurationFile(String configPath) throws FileNotFoundException {
+ LOG.debug("Loading DatabaseConnector configuration file from path \"{}\"", configPath);
+ File dbcConfigFile = new File(configPath);
+ InputStream is;
+ if (dbcConfigFile.exists()) {
+ LOG.debug("Found database configuration at file {}", dbcConfigFile);
+ is = new FileInputStream(configPath);
+ } else {
+ String cpResource = configPath.startsWith("/") ? configPath : "/" + configPath;
+ LOG.debug("The database configuration file could not be found as a file at {}. Trying to lookup configuration as a classpath resource at {}", dbcConfigFile, cpResource);
+ is = DataBaseConnector.class.getResourceAsStream(cpResource);
+ if (is != null)
+ LOG.debug("Found database configuration file as classpath resource at {}", cpResource);
+ }
+ if (is == null) {
+ throw new IllegalArgumentException("DatabaseConnector configuration " + configPath + " could not be found as file or a classpath resource.");
+ }
+ return is;
+ }
+
+ public ConfigReader getConfig() {
+ return config;
+ }
+
+ /**
+ * @param dbUrl
+ * @param user
+ * @param password
+ * @param pgSchema
+ */
+ private void setCredentials(String dbUrl, String user, String password, String pgSchema) {
+ if (dbUrl != null)
+ this.dbURL = dbUrl;
+ if (user != null)
+ this.user = user;
+ if (password != null)
+ this.password = password;
+ if (pgSchema != null)
+ setActivePGSchema(pgSchema);
+ if ((dbUrl != null) || (user != null) || (password != null) || (pgSchema != null))
+ LOG.info("Connecting to " + this.dbURL + " as " + this.user + " in Postgres Schema " + pgSchema);
+ }
+
+ public void setHost(String host) {
+ if (host != null) {
+ dbURL = dbURL.replaceFirst("(.*//)[^/:]+(.*)", "$1" + host + "$2");
+ LOG.debug("Setting database host to {}. DB URL is now {}", host, dbURL);
+ }
+ }
+
+ public void setPort(String port) {
+ setPort(Integer.parseInt(port));
+ }
+
+ public void setPort(Integer port) {
+ if (port != null) {
+ this.dbURL = dbURL.replaceFirst(":[0-9]+", ":" + port);
+ LOG.debug("Setting database port to {}. DB URL is now {}", port, dbURL);
+ }
+ }
+
+ public void setUser(String user) {
+ this.user = user;
+ LOG.debug("Setting database user for {} to {}", this.dbURL, user);
+ }
+
+ public void setPassword(String password) {
+ this.password = password;
+ LOG.debug("Changing database password.");
+ }
+
+ public void setMaxConnections(int num) {
+ dbConfig.setMaxConnections(num);
+ }
+
+ /**
+ * @return A Connection to the database.
+ */
+ Connection getConn() {
+
+ Connection conn = null;
+ synchronized (DataBaseConnector.class) {
+ if (null == dataSource || ((HikariDataSource) dataSource).isClosed()) {
+ LOG.debug("Setting up connection pool data source");
+ HikariConfig hikariConfig = new HikariConfig();
+ hikariConfig.setPoolName("costosys-" + System.nanoTime());
+ hikariConfig.setJdbcUrl(dbURL);
+ hikariConfig.setUsername(user);
+ hikariConfig.setPassword(password);
+ hikariConfig.setConnectionTestQuery("SELECT TRUE");
+ hikariConfig.setMaximumPoolSize(dbConfig.getMaxConnections());
+ hikariConfig.setConnectionTimeout(60000);
+ // required to be able to get the number of idle connections, see below
+ hikariConfig.setRegisterMbeans(true);
+ HikariDataSource ds = pools.compute(dbURL, (url, source) -> source == null ? new HikariDataSource(hikariConfig) : source);
+ if (ds.isClosed()) {
+ ds = new HikariDataSource(hikariConfig);
+ }
+ pools.put(dbURL, ds);
+ dataSource = ds;
+ }
+ }
+
+ try {
+ int retries = 0;
+ do {
+ try {
+ LOG.trace("Waiting for SQL connection to become free...");
+ if (LOG.isTraceEnabled()) {
+ String poolName = dataSource.getPoolName();
+ HikariPoolMXBean poolProxy = dataSource.getHikariPoolMXBean();
+ int totalConnections = poolProxy.getTotalConnections();
+ int idleConnections = poolProxy.getIdleConnections();
+ int activeConnections = poolProxy.getActiveConnections();
+ int threadsAwaitingConnection = poolProxy.getThreadsAwaitingConnection();
+ LOG.trace("Pool {} has {} total connections", poolName, totalConnections);
+ LOG.trace("Pool {} has {} idle connections left", poolName, idleConnections);
+ LOG.trace("Pool {} has {} active connections", poolName, activeConnections);
+ LOG.trace("Pool {} has {} threads awaiting a connection", poolName, threadsAwaitingConnection);
+
+ }
+ conn = dataSource.getConnection();
+ // conn = DriverManager.getConnection(fullURI);
+ LOG.trace("SQL connection obtained.");
+ Statement stm = conn.createStatement();
+ if (!schemaExists(dbConfig.getActivePGSchema(), conn))
+ createSchema(dbConfig.getActivePGSchema(), conn);
+ if (!schemaExists(dbConfig.getActiveDataPGSchema(), conn))
+ createSchema(dbConfig.getActiveDataPGSchema(), conn);
+ stm.execute(String.format("SET search_path TO %s", dbConfig.getActivePGSchema()));
+ stm.close();
+ } catch (SQLException e) {
+ LOG.warn("SQLException occurred:", e);
+ LOG.warn("Could not obtain a database connection within the timeout for thread {}. Trying again. Number of try: {}", Thread.currentThread().getName(), ++retries);
+ MBeanServer mBeanServer = ManagementFactory.getPlatformMBeanServer();
+ try {
+ String poolNameStr = ((HikariDataSource) dataSource).getPoolName();
+ ObjectName poolName = new ObjectName("com.zaxxer.hikari:type=Pool (" + poolNameStr + ")");
+ HikariPoolMXBean poolProxy = JMX.newMXBeanProxy(mBeanServer, poolName, HikariPoolMXBean.class);
+ int totalConnections = poolProxy.getTotalConnections();
+ int idleConnections = poolProxy.getIdleConnections();
+ int activeConnections = poolProxy.getActiveConnections();
+ int threadsAwaitingConnection = poolProxy.getThreadsAwaitingConnection();
+ LOG.warn("Pool {} has {} total connections", poolName, totalConnections);
+ LOG.warn("Pool {} has {} idle connections left", poolName, idleConnections);
+ LOG.warn("Pool {} has {} active connections", poolName, activeConnections);
+ LOG.warn("Pool {} has {} threads awaiting a connection", poolName, threadsAwaitingConnection);
+
+ } catch (Throwable t) {
+ LOG.warn("Could not retrieve connection pool statistics: {}. More information can be found on DEBUG level.", t.getMessage());
+ LOG.debug("Could not retrieve connection pool statistics:", t);
+ }
+ if (retries == 3)
+ throw e;
+ }
+ } while (conn == null);
+ if (retries > 0)
+ LOG.warn("It took {} retries to obtain a connection", retries);
+ } catch (SQLException e) {
+ LOG.error("Could not connect with " + dbURL);
+ throw new UnobtainableConnectionException("No database connection could be obtained from the connection " +
+ "pool. This can have one of two causes: Firstly, the application might just use all connections " +
+ "concurrently. Then, a higher number of maximum active database connections in the CoStoSys " +
+ "configuration might help. This " +
+ "number is currently set to " + config.getDatabaseConfig().getMaxConnections() + ". The other " +
+ "possibility are programming errors where connections are retrieved but not closed. Closing " +
+ "connections means to return them to the pool. It must always be made sure that connections are " +
+ "closed when they are no longer required. If database iterators are used. i.e. subclasses of " +
+ "DBCIterator, make sure to fully read the iterators. Otherwise, they might keep a permanent " +
+ "connection to the database while waiting to be consumed.", e);
+ }
+ return conn;
+ }
+
+
+ /**
+ * @return the activeDataTable
+ */
+ public String getActiveDataTable() {
+ return activeDataTable;
+ }
+
+ /**
+ *
+ * Returns the effective XML configuration as a byte[]
.
+ *
+ *
+ * The effective configuration consists of the default configuration and the
+ * given user configuration as well (merged by the ConfigReader in the
+ * constructor).
+ *
+ *
+ * @return the effectiveConfiguration
+ */
+ public byte[] getEffectiveConfiguration() {
+ return effectiveConfiguration;
+ }
+
+ public String getActiveDataPGSchema() {
+ return activeDataSchema;
+ }
+
+ public String getActivePGSchema() {
+ return dbConfig.getActivePGSchema();
+ }
+
+ public void setActivePGSchema(String pgSchema) {
+ dbConfig.setActivePGSchema(pgSchema);
+ }
+
+ public String getActiveTableSchema() {
+ return activeTableSchema;
+ }
+
+ public void setActiveTableSchema(String schemaName) {
+ this.activeTableSchema = schemaName;
+ }
+
+ public FieldConfig getActiveTableFieldConfiguration() {
+ return fieldConfigs.get(activeTableSchema);
+ }
+
+ /**
+ *
+ * Retrieves from a subset-table limit
primary keys whose rows are
+ * not marked to be in process or finished being processed and sets the rows of
+ * the retrieved primary keys as being "in process".
+ *
+ *
+ * The table is locked during this transaction. Locking and marking ensure that
+ * every primary key will be returned exactly once. Remember to remove the marks
+ * if you want to use the subset again ;)
+ *
+ *
+ * @param subsetTableName - name of a table, conforming to the subset standard
+ * @param hostName - will be saved in the subset table
+ * @param pid - will be saved in the subset table
+ * @return An ArrayList of pmids which have not yet been processed
+ */
+ public List retrieveAndMark(String subsetTableName, String readerComponent, String hostName, String pid) throws TableSchemaMismatchException, TableNotFoundException {
+ return retrieveAndMark(subsetTableName, readerComponent, hostName, pid, RETRIEVE_MARK_LIMIT, null);
+ }
+
+ /**
+ *
+ * Retrieves primary keys from a subset table and marks them as being "in
+ * process". The table schema - and thus the form of the primary keys - is
+ * assumed to match the active table schema determined in the configuration
+ * file.
+ *
+ * The table is locked during this transaction. Locking and marking ensure that
+ * every primary key will be returned exactly once. Remember to remove the marks
+ * if you want to use the subset again ;)
+ *
+ * @param subsetTableName - name of a table, conforming to the subset standard
+ * @param hostName - will be saved in the subset table
+ * @param pid - will be saved in the subset table
+ * @param limit - batchsize for marking/retrieving
+ * @param order - determines an ordering. Default order (which may change over
+ * time) when this parameter is null or empty.
+ * @return An ArrayList of primary keys which have not yet been processed.
+ * @see #retrieveAndMark(String, String, String, String, int, String)
+ */
+ public List retrieveAndMark(String subsetTableName, String readerComponent, String hostName, String pid,
+ int limit, String order) throws TableSchemaMismatchException, TableNotFoundException {
+ return retrieveAndMark(subsetTableName, activeTableSchema, readerComponent, hostName, pid, limit, order);
+ }
+
+ /**
+ *
+ * Retrieves from a subset-table limit
primary keys whose rows are
+ * not marked to be in process or finished being processed and sets the rows of
+ * the retrieved primary keys as being "in process".
+ *
+ *
+ * The following parameters may be set:
+ *
+ * limit
- sets the maximum number of primary keys retrieved
+ * order
- determines whether to retrieve the primary keys in a
+ * particular order. Note that the default order of rows is undefined. If you
+ * need the same order in every run, you should specify some ordering as an SQL
+ * 'ORDER BY' statement. When order
is not prefixed with 'ORDER BY'
+ * (case ignored), it will be inserted.
+ *
+ *
+ *
+ * The table is locked during this transaction. Locking and marking ensure that
+ * every primary key will be returned exactly once. Remember to remove the marks
+ * if you want to use the subset again ;)
+ *
+ *
+ * @param subsetTableName - name of a table, conforming to the subset standard
+ * @param hostName - will be saved in the subset table
+ * @param pid - will be saved in the subset table
+ * @param limit - batchsize for marking/retrieving
+ * @param order - determines an ordering. Default order (which may change over
+ * time) when this parameter is null or empty.
+ * @return An ArrayList of primary keys which have not yet been processed.
+ */
+ public List retrieveAndMark(String subsetTableName, String schemaName, String readerComponent,
+ String hostName, String pid, int limit, String order) throws TableSchemaMismatchException, TableNotFoundException {
+ checkTableDefinition(subsetTableName, schemaName);
+ List ids = new ArrayList<>(limit);
+ String sql = null;
+ Connection conn = null;
+ boolean idsRetrieved = false;
+ while (!idsRetrieved) {
+ try (CoStoSysConnection costoConn = obtainOrReserveConnection()){
+ FieldConfig fieldConfig = fieldConfigs.get(schemaName);
+ conn = costoConn.getConnection();
+
+ conn.setAutoCommit(false);
+ Statement st = conn.createStatement();
+ String orderCommand = order == null ? "" : order;
+ if (!orderCommand.equals("") && !orderCommand.trim().toUpperCase().startsWith("ORDER BY"))
+ orderCommand = "ORDER BY " + orderCommand;
+ String joinStatement = Stream.of(fieldConfig.getPrimaryKey()).map(pk -> {
+ return "t." + pk + "=subquery." + pk;
+ }).collect(Collectors.joining(" AND "));
+ String returnColumns = Stream.of(fieldConfig.getPrimaryKey()).map(pk -> {
+ return "t." + pk;
+ }).collect(Collectors.joining(","));
+
+ // following
+ // http://dba.stackexchange.com/questions/69471/postgres-update-limit-1
+ sql = "UPDATE " + subsetTableName + " AS t SET " + Constants.IN_PROCESS + " = TRUE, "
+ + Constants.LAST_COMPONENT + " = '" + readerComponent + "', " + Constants.HOST_NAME + " = \'"
+ + hostName + "\', " + Constants.PID + " = \'" + pid + "\'," + Constants.PROCESSING_TIMESTAMP
+ + " = 'now' FROM (SELECT " + fieldConfig.getPrimaryKeyString() + " FROM " + subsetTableName
+ + " WHERE " + Constants.IN_PROCESS + " = FALSE AND "
+ // eigentlich wollen wir anstelle von FOR UPDATE sogar:
+ // FOR UPDATE SKIP LOCKED in PostgreSQL 9.5 <---!!
+ + Constants.IS_PROCESSED + " = FALSE " + orderCommand + " LIMIT " + limit
+ + " FOR UPDATE SKIP LOCKED) AS subquery WHERE " + joinStatement + " RETURNING " + returnColumns;
+ try (ResultSet res = st.executeQuery(sql)) {
+ String[] pks = fieldConfig.getPrimaryKey();
+ while (res.next()) {
+ Object[] values = new String[pks.length];
+ for (int i = 0; i < pks.length; i++) {
+ values[i] = res.getObject(i + 1);
+ }
+ ids.add(values);
+ }
+ idsRetrieved = true;
+ }
+ conn.commit();
+ } catch (SQLException e) {
+ // It is possible to run into deadlocks with the above query. Then, one process
+ // will be canceled and we get an exception. If so, just log is and try again.
+ if (!e.getMessage().contains("deadlock detected") && (e.getNextException() == null
+ || !e.getNextException().getMessage().contains("deadlock detected"))) {
+ LOG.error(
+ "Error while retrieving document IDs and marking them to be in process. Sent SQL command: {}.",
+ sql, e);
+ SQLException nextException = e.getNextException();
+ if (null != nextException)
+ LOG.error("Next exception: {}", nextException);
+ // this is not the deadlock error; break the loop
+ break;
+ } else {
+ LOG.debug(
+ "Database deadlock has been detected while trying to retrieve document IDs and marking them to be processed. Tying again.");
+ // We need to close the current, failed, transaction and start a new one for the
+ // new try.
+ try {
+ conn.commit();
+ } catch (SQLException e1) {
+ e1.printStackTrace();
+ }
+ }
+ }
+ }
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("The following IDs were retrieved from table {}: {}", subsetTableName, ids.stream().map(Arrays::toString).collect(Collectors.joining("; ")));
+ }
+ return ids;
+ }
+
+ /**
+ * @param subsetTableName
+ * @return
+ * @see #countUnprocessed(String)
+ */
+ public int countUnprocessed(String subsetTableName) {
+ return countUnprocessed(subsetTableName, activeTableSchema);
+ }
+
+ /**
+ * Counts the unprocessed rows in a subset table
+ *
+ * @param subsetTableName - name of the subset table
+ * @return - number of rows
+ */
+ public int countUnprocessed(String subsetTableName, String schemaName) {
+ FieldConfig fieldConfig = fieldConfigs.get(schemaName);
+
+ int rows = 0;
+ try (CoStoSysConnection conn = obtainOrReserveConnection()) {
+ ResultSet res = conn.getConnection().createStatement().executeQuery(
+ // as we are just looking for any unprocessed documents it
+ // is
+ // sufficient - even in the case of multiple primary key
+ // elements - to use the name of the first element
+ // in this command
+ "SELECT count(" + fieldConfig.getPrimaryKey()[0] + ")" + " FROM " + subsetTableName + " WHERE "
+ + Constants.PROCESSED + " = FALSE;");
+ if (res.next())
+ rows = res.getInt(1);
+ } catch (SQLException e) {
+ e.printStackTrace();
+ }
+ return rows;
+ }
+
+ public int countRowsOfDataTable(String tableName, String whereCondition) {
+ return countRowsOfDataTable(tableName, whereCondition, activeTableSchema);
+ }
+
+ public int countRowsOfDataTable(String tableName, String whereCondition, String schemaName) {
+ FieldConfig fieldConfig = fieldConfigs.get(schemaName);
+
+ int rows = 0;
+ try (CoStoSysConnection conn = obtainOrReserveConnection()) {
+ if (whereCondition != null) {
+ whereCondition = whereCondition.trim();
+ if (!whereCondition.toUpperCase().startsWith("WHERE"))
+ whereCondition = " WHERE " + whereCondition;
+ else
+ whereCondition = " " + whereCondition;
+ } else
+ whereCondition = "";
+
+ ResultSet res = conn.createStatement().executeQuery(
+ "SELECT count(" + fieldConfig.getPrimaryKeyString() + ")" + " FROM " + tableName + whereCondition);
+ if (res.next())
+ rows = res.getInt(1);
+ } catch (SQLException e) {
+ e.printStackTrace();
+ }
+ return rows;
+ }
+
+ public boolean hasUnfetchedRows(String tableName) {
+ return hasUnfetchedRows(tableName, activeTableSchema);
+ }
+
+ /**************************************************************************
+ ******************************** Utility **********************************
+ ***************************************************************************/
+
+ public boolean hasUnfetchedRows(String tableName, String schemaName) {
+ FieldConfig fieldConfig = fieldConfigs.get(schemaName);
+
+ try (CoStoSysConnection conn = obtainOrReserveConnection()) {
+ ResultSet res = conn.createStatement()
+ .executeQuery("SELECT " + fieldConfig.getPrimaryKeyString() + " FROM " + tableName + " WHERE "
+ + Constants.IN_PROCESS + " = FALSE AND " + Constants.IS_PROCESSED + " = FALSE LIMIT 1");
+ return res.next();
+ } catch (SQLException e) {
+ e.printStackTrace();
+ }
+ return false;
+ }
+
+ /**
+ * Deletes entries from a table
+ *
+ * @param table name of the table
+ * @param ids primary key arrays defining the entries to delete
+ * @see #deleteFromTableSimplePK(String, List)
+ */
+ public void deleteFromTable(String table, List ids) {
+ String sql = "DELETE FROM " + table + " WHERE ";
+ modifyTable(sql, ids);
+ }
+
+ /**
+ * Deletes entries from a table where the primary key of this table must consist
+ * of exactly one column. For deletion from tables which contain a
+ * multi-column-primary-key see {@link #deleteFromTable(String, List)}.
+ *
+ * @param table name of the table
+ * @param ids primary key arrays defining the entries to delete
+ * @see #deleteFromTable(String, List)
+ */
+ public void deleteFromTableSimplePK(String table, List ids) {
+ String sql = "DELETE FROM " + table + " WHERE ";
+
+ // Convert the given list to a list of object arrays, so it fits to
+ // 'modifyTable'.
+ List objectIds = new ArrayList(ids.size());
+ for (T id : ids)
+ objectIds.add(new Object[]{id});
+ modifyTable(sql, objectIds);
+ }
+
+ /**
+ * Modifies a subset table, marking entries as processed.
+ *
+ * @param table name of the subset table
+ * @param ids primary key arrays defining the entries to delete
+ */
+ public void markAsProcessed(String table, List ids) {
+ String sql = "UPDATE " + table + " SET " + Constants.PROCESSED + " = TRUE WHERE ";
+ modifyTable(sql, ids);
+ }
+
+ /**
+ *
+ * Executes a given SQL command (must end with "WHERE "!) an extends the
+ * WHERE-clause with the primary keys, set to the values in ids.
+ *
+ *
+ * Assumes that the form of the primary keys matches the definition given in the
+ * active table schema in the configuration.
+ *
+ *
+ * @param sql a valid SQL command, ending with "WHERE "
+ * @param ids list of primary key arrays
+ * @see #modifyTable(String, List)
+ */
+ public void modifyTable(String sql, List ids) {
+ modifyTable(sql, ids, activeTableSchema);
+ }
+
+ /**
+ *
+ * Executes a given SQL command (must end with "WHERE "!) an extends the
+ * WHERE-clause with the primary keys, set to the values in ids.
+ *
+ *
+ * @param sql a valid SQL command, ending with "WHERE "
+ * @param ids list of primary key arrays
+ * @param schemaName name of the schema which defines the primary keys
+ */
+ public void modifyTable(String sql, List ids, String schemaName) {
+ FieldConfig fieldConfig = fieldConfigs.get(schemaName);
+
+ try (CoStoSysConnection conn = obtainOrReserveConnection()) {
+ String where = StringUtils.join(fieldConfig.expandPKNames("%s = ?"), " AND ");
+ String fullSQL = sql + where;
+ PreparedStatement ps = null;
+ try {
+ conn.setAutoCommit(false);
+ ps = conn.prepareStatement(fullSQL);
+ } catch (SQLException e) {
+ LOG.error("Couldn't prepare: " + fullSQL);
+ e.printStackTrace();
+ }
+ String[] pks = fieldConfig.getPrimaryKey();
+ for (Object[] id : ids) {
+ for (int i = 0; i < id.length; ++i) {
+ try {
+ setPreparedStatementParameterWithType(i + 1, ps, id[i], pks[i], fieldConfig);
+ } catch (SQLException e) {
+ e.printStackTrace();
+ }
+ }
+ try {
+ ps.addBatch();
+ } catch (SQLException e) {
+ e.printStackTrace();
+ }
+ }
+ try {
+ ps.executeBatch();
+ conn.commit();
+ } catch (SQLException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+
+ /**
+ * just calls ps.setObject(position, value);
+ *
+ * @param position
+ * @param ps
+ * @param value
+ * @param fieldName
+ * @param fieldConfig
+ * @throws SQLException
+ */
+ private void setPreparedStatementParameterWithType(int position, PreparedStatement ps, Object value,
+ String fieldName, FieldConfig fieldConfig) throws SQLException {
+ ps.setObject(position, value);
+ }
+
+ /**
+ * Returns the name of a table referenced by an SQL-foreign-key.
+ *
+ * @param referencingTable the name of the table for which the foreign keys shall be checked
+ * @return the name of the first referenced table or null
if there
+ * is no referenced table (i.e. the passed table name denotes a data
+ * table).
+ * @throws IllegalArgumentException When referencingTable
is null
.
+ */
+ public String getReferencedTable(String referencingTable) {
+ if (referencingTable == null)
+ throw new IllegalArgumentException("Name of referencing table may not be null.");
+
+ String referencedTable = null;
+ try (CoStoSysConnection conn = obtainOrReserveConnection()) {
+ String pgSchema = dbConfig.getActivePGSchema();
+ String tableName = referencingTable;
+ if (referencingTable.contains(".")) {
+ pgSchema = referencingTable.replaceFirst("\\..*$", "");
+ tableName = referencingTable.substring(referencingTable.indexOf('.') + 1);
+ }
+ // Lowercasing of the table name since case matters but postgres
+ // does lowercase on table creation.
+ ResultSet imported = conn.getMetaData().getImportedKeys("", pgSchema, tableName.toLowerCase());
+
+ if (imported.next()) {
+ String pkTableSchema = imported.getString(2);
+ String pkTableName = imported.getString(3);
+ referencedTable = pkTableSchema != null ? pkTableSchema + "." + pkTableName : pkTableName;
+ }
+ } catch (SQLException e1) {
+ throw new CoStoSysSQLRuntimeException(e1);
+ }
+ return referencedTable;
+ }
+
+ /**
+ * Creates a PostgreSQL schema
+ *
+ * This private method is called by the SQL Connection
source, thus
+ * it takes the Connection
as a parameter instead of getting a
+ * Connection
on its own.
+ *
+ *
+ * @param schemaName The name of the PostgreSQL schema to create.
+ * @param conn Connection to the database which should be checked for the
+ * existence of the schema schemaName
.
+ */
+ private void createSchema(String schemaName, Connection conn) {
+ String sqlStr = "CREATE SCHEMA " + schemaName;
+ try {
+ conn.createStatement().execute(sqlStr);
+ LOG.info("PostgreSQL schema \"{}\" does not exist, it is being created.", schemaName);
+ } catch (SQLException e) {
+ LOG.error(sqlStr);
+ e.printStackTrace();
+ }
+ }
+
+ /**
+ * Creates the PostgreSQL schema schemaName
in the active database.
+ *
+ * @param schemaName The name of the PostgreSQL schema to create.
+ */
+ public void createSchema(String schemaName) {
+ try (CoStoSysConnection conn = obtainOrReserveConnection()) {
+ createSchema(schemaName, conn.getConnection());
+ }
+ }
+
+ /**
+ * Creates a new table according to the field schema definition corresponding to
+ * the active schema name determined in the configuration.
+ *
+ * @param tableName the name of the new table
+ * @throws SQLException
+ */
+ public void createTable(String tableName, String comment) throws SQLException {
+ createTable(tableName, activeTableSchema, comment);
+ }
+
+ /**
+ * Creates a new table according to the field schema definition corresponding to
+ * the name schemaName
given in the configuration file.
+ *
+ * @param tableName the name of the new table
+ * @throws SQLException
+ */
+ public void createTable(String tableName, String schemaName, String comment) {
+ FieldConfig fieldConfig = fieldConfigs.get(schemaName);
+
+ ArrayList columns = getTableCreationColumns(tableName, fieldConfig);
+
+ createTable(tableName, columns, comment);
+
+ // additionally, restrict the primary key to be unique
+ // (I don't know why this is necessary, but it is required
+ // for a referencing table which references several columns,
+ // that these columns own a UNIQUE constraint.)
+ if (fieldConfig.getPrimaryKey().length > 0)
+ alterTable(String.format("ADD CONSTRAINT %s_unique UNIQUE (%s)", tableName.replace(".", ""),
+ fieldConfig.getPrimaryKeyString()), tableName);
+ }
+
+ /**
+ *
+ * Creates a new table according to the field schema definition corresponding to
+ * the name schemaName
and with foreign key references to the
+ * primary key of referenceTableName .
+ *
+ *
+ * The primary key of the tables tableName and
+ * referenceTableName must be equal. The foreign key constraint is
+ * configured for ON DELETE CASCADE which means, when in the referenced
+ * table rows are deleted, there are also deleted in the table created by this
+ * method call.
+ *
+ *
+ * @param tableName The name of the new table.
+ * @param referenceTableName The table to be referenced by this table.
+ * @param schemaName The table schema determining the structure (especially the primary
+ * key) of the new table.
+ * @param comment A comment for the new table.
+ * @throws SQLException
+ */
+ public void createTable(String tableName, String referenceTableName, String schemaName, String comment) {
+ FieldConfig fieldConfig = fieldConfigs.get(schemaName);
+
+ ArrayList columns = getTableCreationColumns(tableName, fieldConfig);
+ columns.add(String.format("CONSTRAINT %s_fkey FOREIGN KEY (%s) REFERENCES %s ON DELETE CASCADE",
+ tableName.replace(".", ""), fieldConfig.getPrimaryKeyString(), referenceTableName));
+
+ createTable(tableName, columns, comment);
+
+ // additionally, restrict the primary key to be unique
+ // (I don't know why this is necessary, but it is required
+ // for a referencing table which references several columns,
+ // that these columns own a UNIQUE constraint.)
+ if (fieldConfig.getPrimaryKey().length > 0)
+ alterTable(String.format("ADD CONSTRAINT %s_unique UNIQUE (%s)", tableName.replace(".", ""),
+ fieldConfig.getPrimaryKeyString()), tableName);
+ }
+
+ /**
+ * Creates the columns to create a table according to the table schema given by
+ * fieldConfig for use with {@link #createTable(String, List, String)}.
+ *
+ * @param tableName
+ * @param fieldConfig
+ * @return
+ */
+ private ArrayList getTableCreationColumns(String tableName, FieldConfig fieldConfig) {
+ ArrayList columns = new ArrayList();
+ for (Map field : fieldConfig.getFields()) {
+ StringBuilder columnStrBuilder = new StringBuilder();
+ columnStrBuilder.append(field.get(JulieXMLConstants.NAME));
+ columnStrBuilder.append(" ");
+ columnStrBuilder.append(field.get(JulieXMLConstants.TYPE));
+ columns.add(columnStrBuilder.toString());
+ }
+ if (fieldConfig.getPrimaryKey().length > 0)
+ columns.add(String.format("CONSTRAINT %s_pkey PRIMARY KEY (%s)", tableName.replace(".", ""),
+ fieldConfig.getPrimaryKeyString()));
+ return columns;
+ }
+
+ /**
+ * Creates a new table with custom columns.
+ *
+ * @param tableName the name of the new table
+ * @param columns a list of Strings, each containing name, type and constraint of a
+ * column, e.g. "foo integer primary key" as required for a valid sql
+ * command.
+ * @throws CoStoSysSQLRuntimeException If the SQL command fails.
+ */
+ private void createTable(String tableName, List columns, String comment) {
+ try (CoStoSysConnection conn = obtainOrReserveConnection()) {
+ StringBuilder sb = new StringBuilder("CREATE TABLE " + tableName + " (");
+ for (String column : columns)
+ sb.append(", " + column);
+ sb.append(");");
+ String sqlString = sb.toString().replaceFirst(", ", "");
+ try {
+ Statement st = conn.createStatement();
+ st.execute(sqlString);
+ st.execute("COMMENT ON TABLE " + tableName + " IS \'" + comment + "\';");
+ } catch (SQLException e) {
+ System.err.println(sqlString);
+ e.printStackTrace();
+ throw new CoStoSysSQLRuntimeException(e);
+ }
+ }
+ }
+
+ /**
+ *
+ * Does the same as {@link #createSubsetTable(String, String, Integer, String, String)}
+ * with the exception that the assumed table schema is that of the active schema
+ * defined in the configuration file.
+ *
+ *
+ * @param subsetTable name of the subset table
+ * @param supersetTable name of the referenced table
+ * @param maxNumberRefHops the maximum number of times a foreign key reference to a data
+ * table may be followed
+ * @param comment will be added to the table in the database, used to make tables
+ * reproducable
+ * @throws SQLException
+ */
+ public void createSubsetTable(String subsetTable, String supersetTable, Integer maxNumberRefHops, String comment)
+ throws SQLException {
+ createSubsetTable(subsetTable, supersetTable, maxNumberRefHops, comment, activeTableSchema);
+ }
+
+ /**
+ *
+ * Does the same as {@link #createSubsetTable(String, String, Integer, String, String)}
+ * with the exception that the assumed table schema is that of the active schema
+ * defined in the configuration file and the first referenced data table is used as data table.
+ *
+ *
+ * @param subsetTable name of the subset table
+ * @param supersetTable name of the referenced table
+ * @param comment will be added to the table in the database, used to make tables
+ * reproducable
+ * @throws SQLException
+ */
+ public void createSubsetTable(String subsetTable, String supersetTable, String comment) throws SQLException {
+ createSubsetTable(subsetTable, supersetTable, null, comment, activeTableSchema);
+ }
+
+ /**
+ *
+ * Creates an empty table referencing the primary key of the data table given by
+ * superSetTable
or, if this is a subset table itself, the data
+ * table referenced by that table.
+ *
+ *
+ * To fill the empty subset table with data, use one of the
+ * init[...]
methods offered by this class.
+ *
+ *
+ * Subset tables have a particular table scheme. They define a foreign key to
+ * the primary key of the referenced data table. There are the following
+ * additional columns:
+ *
+ *
+ * Name
+ * Type
+ *
+ *
+ * is_in_process
+ * boolean
+ *
+ *
+ * is_processed
+ * boolean
+ *
+ *
+ * last_component
+ * text
+ *
+ *
+ * log
+ * text
+ *
+ *
+ * has errors
+ * boolean
+ *
+ *
+ * pid
+ * character varying(10)
+ *
+ *
+ * host_name
+ * character varying(100)
+ *
+ *
+ * processing_timestamp
+ * timestamp without time zone
+ *
+ *
+ *
+ *
+ * The subset table can be used for processing, e.g. by UIMA CollectionReaders,
+ * which store information about the processing in it.
+ *
+ * The actual data is located in the referenced table.
+ *
+ * @param subsetTable name of the subset table
+ * @param supersetTable name of the referenced table
+ * @param posOfDataTable the position of the datatable that should be referenced; the 1st
+ * would be nearest data table, i.e. perhaps supersetTable
+ * itself. The 2nd would be the datatable referenced by the first
+ * data table on the reference path.
+ * @param schemaName name of the table schema to work with (determined in the
+ * configuration file)
+ * @param comment will be added to the table in the database, used to make tables
+ * reproducable
+ * @throws SQLException
+ */
+ public void createSubsetTable(String subsetTable, String supersetTable, Integer posOfDataTable, String comment,
+ String schemaName) throws SQLException {
+ FieldConfig fieldConfig = fieldConfigs.get(schemaName);
+
+ String effectiveDataTable = getReferencedTable(supersetTable, posOfDataTable);
+
+ ArrayList columns = new ArrayList();
+ List> fields = fieldConfig.getFields();
+ HashSet pks = new HashSet(Arrays.asList(fieldConfig.getPrimaryKey()));
+ for (Map field : fields) {
+ String name = field.get(JulieXMLConstants.NAME);
+ if (pks.contains(name))
+ columns.add(name + " " + field.get(JulieXMLConstants.TYPE));
+ }
+
+ // Add the columns to the table.
+ for (Entry columnDefinition : subsetColumns.entrySet()) {
+ columns.add(columnDefinition.getKey() + " " + columnDefinition.getValue());
+ }
+ // Define the primary key of the table.
+ String pkStr = fieldConfig.getPrimaryKeyString();
+ columns.add(String.format("CONSTRAINT %s_pkey PRIMARY KEY (%s)", subsetTable.replace(".", ""), pkStr));
+ columns.add(String.format("CONSTRAINT %s_fkey FOREIGN KEY (%s) REFERENCES %s ON DELETE CASCADE",
+ subsetTable.replace(".", ""), pkStr, effectiveDataTable));
+ createTable(subsetTable, columns, comment);
+ createIndex(subsetTable, Constants.IS_PROCESSED, Constants.IN_PROCESS);
+ }
+
+ /**
+ * Creates an index for table table on the given columns . The
+ * name of the index will be <table>_idx . It is currently not
+ * possible to create a second index since the names would collide. This would
+ * require an extension of this method for different names.
+ *
+ * @param table The table for which an index should be created.
+ * @param columns The columns the index should cover.
+ * @throws SQLException In case something goes wrong.
+ */
+ public void createIndex(String table, String... columns) throws SQLException {
+ try (CoStoSysConnection conn = obtainOrReserveConnection()) {
+ String sql = String.format("CREATE INDEX %s_idx ON %s (%s)", table.replace(".", ""), table,
+ String.join(",", columns));
+ conn.createStatement().execute(sql);
+ }
+ }
+
+ /**
+ * Gets the - possibly indirectly - referenced table of startTable
+ * where posOfDataTable specifies the position of the desired table in
+ * the reference chain starting at startTable .
+ *
+ * @param startTable
+ * @param posOfDataTable
+ * @return
+ * @throws SQLException
+ */
+ public String getReferencedTable(String startTable, Integer posOfDataTable) throws SQLException {
+ if (posOfDataTable == null)
+ posOfDataTable = 1;
+ int currentDatatablePosition = isDataTable(startTable) ? 1 : 0;
+ Set blacklist = new HashSet<>();
+ String effectiveDataTable = startTable;
+ String lasttable = "";
+ while (isSubsetTable(effectiveDataTable) || currentDatatablePosition < posOfDataTable) {
+ if (blacklist.contains(effectiveDataTable)) {
+ if (effectiveDataTable.equals(lasttable))
+ throw new IllegalStateException(
+ "The table \"" + lasttable + "\" has a foreign key on itself. This is not allowed.");
+ throw new IllegalStateException(
+ "Fatal error: There is a circel in the foreign key chain. The table \"" + effectiveDataTable
+ + "\" has been found twice when following the foreign key chain of the table \""
+ + startTable + "\".");
+ }
+ blacklist.add(effectiveDataTable);
+ lasttable = effectiveDataTable;
+ effectiveDataTable = getNextDataTable(effectiveDataTable);
+ currentDatatablePosition++;
+ }
+ return effectiveDataTable;
+ }
+
+ /**
+ * Follows the foreign-key specifications of the given table to the referenced table. This process is repeated until
+ * a non-subset table (a table for which {@link #isSubsetTable(String)} returns false
) is encountered
+ * or a table without a foreign-key is found. If referencingTable
has no foreign-key itself, null is returned
+ * since the referenced table does not exist.
+ *
+ * @param referencingTable The table to get the next referenced data table for, possibly across other subsets if referencingTable
denotes a subset table..
+ * @return The found data table or null
, if referencingTable
is a data table itself.
+ * @throws CoStoSysSQLRuntimeException If table meta data checking fails.
+ */
+ public String getNextDataTable(String referencingTable) {
+ String referencedTable = getReferencedTable(referencingTable);
+ while (isSubsetTable(referencedTable)) {
+ referencedTable = getReferencedTable(referencedTable);
+ }
+ return referencedTable;
+ }
+
+ /**
+ * Determines the first data table on the reference path referencingTable -> table1 -> table2 -> ... -> lastTable -> null
+ * referenced from referencingTable
. This means that referencingTable
is returned itself
+ * if it is a data table.
+ *
+ * @param referencingTable The start point table for the path for which the first data table is to be returned.
+ * @return The first data table on the foreign-key path beginning with referencingTable
itself.
+ * @throws SQLException If a database operation fails.
+ */
+ public String getNextOrThisDataTable(String referencingTable) {
+ if (isDataTable(referencingTable))
+ return referencingTable;
+ return getNextDataTable(referencingTable);
+ }
+
+ /**
+ *
+ * Checks if the given table is a subset table.
+ *
+ * A database table is identified to be a subset table if it exhibits all the column names that subsets
+ * have. Those are defined in {@link #subsetColumns}.
+ *
+ * @param table The table to check for being a subset table.
+ * @return True, iff table
denotes a subset table, false otherwise. The latter case includes the table
parameter being null
.
+ * @throws SQLException If table meta data checking fails.
+ */
+ public boolean isSubsetTable(String table) {
+ if (table == null)
+ return false;
+ try (CoStoSysConnection conn = obtainOrReserveConnection()) {
+ String pgSchema = dbConfig.getActivePGSchema();
+ String tableName = table;
+ if (table.contains(".")) {
+ pgSchema = table.replaceFirst("\\..*$", "");
+ tableName = table.substring(table.indexOf('.') + 1);
+ }
+ try {
+ // Do lowercase on the table name: Case matters and postgres always
+ // lowercases the names on creation...
+ ResultSet columns = conn.getMetaData().getColumns(null, pgSchema, tableName.toLowerCase(), null);
+ int numSubsetColumnsFound = 0;
+ while (columns.next()) {
+ String columnName = columns.getString(4);
+ if (subsetColumns.keySet().contains(columnName))
+ numSubsetColumnsFound++;
+ }
+ return numSubsetColumnsFound == subsetColumns.size();
+ } catch (SQLException e) {
+ throw new CoStoSysSQLRuntimeException(e);
+ }
+ }
+ }
+
+ public boolean isDataTable(String table) {
+ return !isSubsetTable(table);
+ }
+
+ public boolean dropTable(String table) throws SQLException {
+ try (CoStoSysConnection conn = obtainOrReserveConnection()) {
+ Statement stmt = conn.createStatement();
+ String sql = "DROP TABLE " + table;
+ return stmt.execute(sql);
+ }
+ }
+
+ /**
+ * Tests if a table exists.
+ *
+ * @param tableName name of the table to test
+ * @return true if the table exists, false otherwise
+ */
+ public boolean tableExists(CoStoSysConnection conn, String tableName) {
+ if (tableName == null)
+ throw new IllegalArgumentException("The passed table name is null.");
+ try {
+ Statement stmt = conn.createStatement();
+ String pureTableName = tableName;
+ String schemaName = dbConfig.getActivePGSchema();
+ if (tableName.contains(".")) {
+ String[] split = tableName.split("\\.");
+ schemaName = split[0];
+ pureTableName = split[1];
+ }
+ // Lowercase the names because in Postgres they are lowercased
+ // automatically when the tables are created. Thus, when not
+ // lowercasing we risk to miss the correct entry.
+ String sql = String.format(
+ "select schemaname,tablename from pg_tables where schemaname = '%s' and tablename = '%s'",
+ schemaName.toLowerCase(), pureTableName.toLowerCase());
+ LOG.trace("Checking whether table {} in schema {} exists.", pureTableName, schemaName);
+ LOG.trace("Sent query (names have been lowercased to match Postgres table names): {}", sql);
+ ResultSet res = stmt.executeQuery(sql);
+ return res.next();
+ } catch (SQLException e) {
+ e.printStackTrace();
+ SQLException ne = e.getNextException();
+ if (null != ne)
+ ne.printStackTrace();
+ }
+ return false;
+ }
+
+ /**
+ * Tests if a table exists.
+ *
+ * @param tableName name of the table to test
+ * @return true if the table exists, false otherwise
+ */
+ public boolean tableExists(String tableName) {
+ try (CoStoSysConnection conn = obtainOrReserveConnection()) {
+ return tableExists(conn, tableName);
+ }
+ }
+
+ /**
+ * Tests if a schema exists.
+ *
+ * This private method is called by the SQL Connection
source, thus
+ * it takes the Connection
as a parameter instead of getting a
+ * Connection
on its own.
+ *
+ *
+ * @param schemaName name of the schema to test
+ * @param conn Connection to the database which should be checked for the
+ * existence of the schema schemaName
.
+ * @return true if the schema exists, false otherwise
+ */
+ private boolean schemaExists(String schemaName, Connection conn) {
+ try {
+ ResultSet rs = conn.createStatement()
+ .executeQuery("SELECT * FROM pg_namespace WHERE nspname = '" + schemaName + "'");
+ return rs.next();
+ } catch (SQLException e) {
+ e.printStackTrace();
+ }
+ return false;
+ }
+
+ /**
+ * Tests if a schema exists.
+ *
+ * @param schemaName name of the schema to test
+ * @return true if the schema exists, false otherwise
+ */
+ public boolean schemaExists(String schemaName) {
+ try (CoStoSysConnection conn = obtainOrReserveConnection()) {
+ boolean exists = schemaExists(schemaName, conn.getConnection());
+
+ return exists;
+ }
+ }
+
+ /**
+ * Tests if a table contains entries.
+ *
+ * @param tableName name of the schema to test
+ * @return true if the table has entries, false otherwise
+ */
+ public boolean isEmpty(String tableName) {
+
+ String sqlStr = "SELECT * FROM " + tableName + " LIMIT 1";
+ try (CoStoSysConnection conn = obtainOrReserveConnection()) {
+ Statement st = conn.createStatement();
+ ResultSet res = st.executeQuery(sqlStr);
+
+ return !res.next();
+ } catch (SQLException e) {
+ e.printStackTrace();
+ }
+ return false;
+ }
+
+ /**************************************************************************
+ ********************************* Data Import *****************************
+ **************************************************************************/
+
+ /**
+ *
+ * Convenience method for creating and initializing a subset in one step. See
+ * method references below for more information.
+ *
+ *
+ * @param size
+ * @param subsetTable
+ * @param supersetTable
+ * @param comment
+ * @throws SQLException
+ * @see #initRandomSubset(int, String, String)
+ */
+ public void defineRandomSubset(int size, String subsetTable, String supersetTable, String comment)
+ throws SQLException {
+ createSubsetTable(subsetTable, supersetTable, comment);
+ initRandomSubset(size, subsetTable, supersetTable);
+ }
+
+ /**
+ *
+ * Convenience method for creating and initializing a subset in one step. See
+ * method references below for more information.
+ *
+ *
+ * @param size
+ * @param subsetTable
+ * @param supersetTable
+ * @param comment
+ * @param schemaName
+ * @throws SQLException
+ * @see #initRandomSubset(int, String, String, String)
+ */
+ public void defineRandomSubset(int size, String subsetTable, String supersetTable, String comment,
+ String schemaName) throws SQLException {
+ createSubsetTable(subsetTable, supersetTable, null, schemaName, comment);
+ initRandomSubset(size, subsetTable, supersetTable, schemaName);
+ }
+
+ /**
+ *
+ * Convenience method for creating and initializing a subset in one step. See
+ * method references below for more information.
+ *
+ *
+ * @param values
+ * @param subsetTable
+ * @param supersetTable
+ * @param columnToTest
+ * @param comment
+ * @throws SQLException
+ * @see #initSubset(List, String, String, String)
+ */
+ public void defineSubset(List values, String subsetTable, String supersetTable, String columnToTest,
+ String comment) throws SQLException {
+ createSubsetTable(subsetTable, supersetTable, comment);
+ initSubset(values, subsetTable, supersetTable, columnToTest);
+ }
+
+ /**
+ *
+ * Convenience method for creating and initializing a subset in one step. See
+ * method references below for more information.
+ *
+ *
+ * @param values
+ * @param subsetTable
+ * @param supersetTable
+ * @param columnToTest
+ * @param comment
+ * @param schemaName
+ * @throws SQLException
+ * @see #initSubset(List, String, String, String, String)
+ */
+ public void defineSubset(List values, String subsetTable, String supersetTable, String columnToTest,
+ String comment, String schemaName) throws SQLException {
+ createSubsetTable(subsetTable, supersetTable, null, comment, schemaName);
+ initSubset(values, subsetTable, supersetTable, columnToTest, schemaName);
+ }
+
+ /**
+ *
+ * Convenience method for creating and initializing a subset in one step. See
+ * method references below for more information.
+ *
+ *
+ * @param subsetTable
+ * @param supersetTable
+ * @param comment
+ * @throws SQLException
+ * @see #initSubset(String, String)
+ */
+ public void defineSubset(String subsetTable, String supersetTable, String comment) throws SQLException {
+ createSubsetTable(subsetTable, supersetTable, comment);
+ initSubset(subsetTable, supersetTable);
+ }
+
+ /**
+ *
+ * Convenience method for creating and initializing a subset in one step. See
+ * method references below for more information.
+ *
+ *
+ * @param subsetTable
+ * @param supersetTable
+ * @param comment
+ * @param schemaName
+ * @throws SQLException
+ * @see #initSubset(List, String, String, String, String)
+ */
+ public void defineSubset(String subsetTable, String supersetTable, String comment, String schemaName)
+ throws SQLException {
+ createSubsetTable(subsetTable, supersetTable, null, comment, schemaName);
+ initSubset(subsetTable, supersetTable, schemaName);
+ }
+
+ /**
+ *
+ * Convenience method for creating and initializing a subset in one step. See
+ * method references below for more information.
+ *
+ *
+ * @param subsetTable
+ * @param supersetTable
+ * @param conditionToCheck
+ * @param comment
+ * @throws SQLException
+ * @see #initSubsetWithWhereClause(String, String, String)
+ */
+ public void defineSubsetWithWhereClause(String subsetTable, String supersetTable, String conditionToCheck,
+ String comment) throws SQLException {
+ createSubsetTable(subsetTable, supersetTable, comment);
+ initSubsetWithWhereClause(subsetTable, supersetTable, conditionToCheck);
+ }
+
+ /**
+ *
+ * Convenience method for creating and initializing a subset in one step. See
+ * method references below for more information.
+ *
+ *
+ * @param subsetTable
+ * @param supersetTable
+ * @param conditionToCheck
+ * @param comment
+ * @param schemaName
+ * @throws SQLException
+ * @see #initSubsetWithWhereClause(String, String, String, String)
+ */
+ public void defineSubsetWithWhereClause(String subsetTable, String supersetTable, String conditionToCheck,
+ String comment, String schemaName) throws SQLException {
+ createSubsetTable(subsetTable, supersetTable, null, comment, schemaName);
+ initSubsetWithWhereClause(subsetTable, supersetTable, conditionToCheck, schemaName);
+ }
+
+ /**
+ *
+ * Convenience method for creating and initializing a subset in one step. See
+ * method references below for more information.
+ *
+ *
+ * @param subsetTable
+ * @param supersetTable
+ * @param comment
+ * @throws SQLException
+ */
+ public void defineMirrorSubset(String subsetTable, String supersetTable, boolean performUpdate, String comment)
+ throws SQLException {
+ createSubsetTable(subsetTable, supersetTable, comment);
+ initMirrorSubset(subsetTable, supersetTable, performUpdate);
+ }
+
+ /**
+ *
+ * Convenience method for creating and initializing a subset in one step. See
+ * method references below for more information.
+ *
+ *
+ * @param subsetTable
+ * @param supersetTable
+ * @param maxNumberRefHops the maximum number of times a foreign key reference to a data
+ * table may be followed
+ * @param comment
+ * @throws SQLException
+ * @see #createSubsetTable(String, String, Integer, String)
+ */
+ public void defineMirrorSubset(String subsetTable, String supersetTable, boolean performUpdate,
+ Integer maxNumberRefHops, String comment) throws SQLException {
+ createSubsetTable(subsetTable, supersetTable, maxNumberRefHops, comment);
+ initMirrorSubset(subsetTable, supersetTable, performUpdate);
+ }
+
+ /**
+ *
+ * Convenience method for creating and initializing a subset in one step. See
+ * method references below for more information.
+ *
+ *
+ * @param subsetTable
+ * @param supersetTable
+ * @param comment
+ * @param schemaName
+ * @throws SQLException
+ */
+ public void defineMirrorSubset(String subsetTable, String supersetTable, boolean performUpdate, String comment,
+ String schemaName) throws SQLException {
+ createSubsetTable(subsetTable, supersetTable, null, comment, schemaName);
+ initMirrorSubset(subsetTable, supersetTable, performUpdate, schemaName);
+ }
+
+ /**
+ * @see #initRandomSubset(int, String, String, String)
+ */
+ public void initRandomSubset(int size, String subsetTable, String supersetTable) {
+ initRandomSubset(size, subsetTable, supersetTable, activeTableSchema);
+ }
+
+ /**
+ *
+ * Selects size
rows of the given super set table randomly and
+ * inserts them into the subset table.
+ *
+ *
+ * @param size size of the subset to create
+ * @param subsetTable name of subset table to insert the chosen rows into
+ * @param superSetTable name of the table to choose from
+ * @param schemaName name of the schema to use
+ */
+ public void initRandomSubset(int size, String subsetTable, String superSetTable, String schemaName) {
+ FieldConfig fieldConfig = fieldConfigs.get(schemaName);
+ String sql = "INSERT INTO " + subsetTable + " (SELECT %s FROM " + superSetTable + " ORDER BY RANDOM() LIMIT "
+ + size + ");";
+ sql = String.format(sql, fieldConfig.getPrimaryKeyString());
+ try (CoStoSysConnection conn = obtainOrReserveConnection()) {
+ conn.createStatement().execute(sql);
+ } catch (SQLException e) {
+ LOG.error(sql);
+ e.printStackTrace();
+ }
+ }
+
+ // TODO: could be merged with defineSubsetWithWhereClause ?
+ // EF: But here the ID list is broken down into smaller lists for which the
+ // where clause is built. defineSubsetWithWhereClause isn't capable of such
+ // things. So my vote is to let it the current way (09.01.2012).
+
+ /**
+ * Defines a subset by populating a subset table with primary keys from another
+ * table. A WHERE clause is used to control which entries are copied, checking
+ * if columnToTest has the desired value.
+ *
+ * @param values Desired values for the columnToTest
+ * @param subsetTable name of the subset table
+ * @param supersetTable name of table to reference
+ * @param columnToTest column to check for value
+ */
+ public void initSubset(List values, String subsetTable, String supersetTable, String columnToTest) {
+ initSubset(values, subsetTable, supersetTable, columnToTest, activeTableSchema);
+ }
+
+ /**
+ * Defines a subset by populating a subset table with primary keys from another
+ * table. A WHERE clause is used to control which entries are copied, checking
+ * if columnToTest has the desired value.
+ *
+ * @param values Desired values for the columnToTest
+ * @param subsetTable name of the subset table
+ * @param supersetTable name of table to reference
+ * @param schemaName schema to use
+ * @param columnToTest column to check for value
+ */
+ public void initSubset(List values, String subsetTable, String supersetTable, String columnToTest,
+ String schemaName) {
+ FieldConfig fieldConfig = fieldConfigs.get(schemaName);
+
+ int idSize = values.size();
+
+ Statement st;
+ String sql = null;
+ try (CoStoSysConnection conn = obtainOrReserveConnection()) {
+ st = conn.createStatement();
+ for (int i = 0; i < idSize; i += ID_SUBLIST_SIZE) {
+ List subList = i + ID_SUBLIST_SIZE - 1 < idSize ? values.subList(i, i + ID_SUBLIST_SIZE)
+ : values.subList(i, idSize);
+ if (fieldConfig.isOfStringType(columnToTest))
+ ;
+ String expansionString = columnToTest + " = '%s'";
+ String[] expandedIDs = JulieXMLTools.expandArrayEntries(subList, expansionString);
+ String where = StringUtils.join(expandedIDs, " OR ");
+ sql = "INSERT INTO " + subsetTable + " (SELECT " + fieldConfig.getPrimaryKeyString() + " FROM "
+ + supersetTable + " WHERE " + where + ")";
+ st.execute(sql);
+ }
+ } catch (SQLException e) {
+ LOG.error("SQLError while initializing subset {}. SQL query was: {}", subsetTable, sql);
+ e.printStackTrace();
+ }
+ }
+
+ /**
+ * Initializes subsetTable
by inserting one row for each entry in supersetTable
.
+ *
+ * @param subsetTable
+ * @param supersetTable
+ * @see #initSubset(String, String, String)
+ */
+ public void initSubset(String subsetTable, String supersetTable) {
+ initSubset(subsetTable, supersetTable, activeTableSchema);
+ }
+
+ /**
+ * Defines a subset by populating a subset table with all primary keys from
+ * another table.
+ *
+ * @param subsetTable name of the subset table
+ * @param supersetTable name of table to reference
+ * @param schemaName name of the schema used to determine the primary keys
+ */
+ public void initSubset(String subsetTable, String supersetTable, String schemaName) {
+ FieldConfig fieldConfig = fieldConfigs.get(schemaName);
+
+ if (fieldConfig.getPrimaryKey().length == 0)
+ throw new IllegalStateException("Not subset tables corresponding to table scheme \"" + fieldConfig.getName()
+ + "\" can be created since this scheme does not define a primary key.");
+
+
+ try (CoStoSysConnection conn = obtainOrReserveConnection()) {
+ String pkStr = fieldConfig.getPrimaryKeyString();
+
+ Statement st = conn.createStatement();
+ String stStr = String.format("INSERT INTO %s (%s) (SELECT %s FROM %s);", subsetTable, pkStr, pkStr,
+ supersetTable);
+ st.execute(stStr);
+ } catch (SQLException e) {
+ e.printStackTrace();
+ }
+ }
+
+ /**
+ * Defines a subset by populating a subset table with primary keys from another
+ * table. All those entries are selected, for which the conditionToCheck is
+ * true.
+ *
+ * @param subsetTable name of the subset table
+ * @param supersetTable name of table to reference
+ * @param whereClause condition to check by a SQL WHERE clause, e.g. 'foo > 10'
+ * @see #initSubsetWithWhereClause(String, String, String, String)
+ */
+ public void initSubsetWithWhereClause(String subsetTable, String supersetTable, String whereClause) {
+ initSubsetWithWhereClause(subsetTable, supersetTable, whereClause, activeTableSchema);
+ }
+
+ /**
+ * Defines a subset by populating a subset table with primary keys from another
+ * table. All those entries are selected, for which the conditionToCheck is
+ * true.
+ *
+ * @param subsetTable name of the subset table
+ * @param supersetTable name of table to reference
+ * @param schemaName name of the schema used to determine the primary keys
+ * @param whereClause condition to check by a SQL WHERE clause, e.g. 'foo > 10'
+ */
+ public void initSubsetWithWhereClause(String subsetTable, String supersetTable, String whereClause,
+ String schemaName) {
+ FieldConfig fieldConfig = fieldConfigs.get(schemaName);
+
+
+ String stStr = null;
+ try (CoStoSysConnection conn = obtainOrReserveConnection()) {
+ if (!whereClause.toUpperCase().startsWith("WHERE"))
+ whereClause = "WHERE " + whereClause;
+
+ String pkStr = fieldConfig.getPrimaryKeyString();
+
+ Statement st = conn.createStatement();
+ stStr = String.format("INSERT INTO %s (%s) (SELECT %s FROM %s %s);", subsetTable, pkStr, pkStr,
+ supersetTable, whereClause);
+ st.execute(stStr);
+ } catch (SQLException e) {
+ LOG.error(stStr);
+ e.printStackTrace();
+ }
+ }
+
+ public void initMirrorSubset(String subsetTable, String supersetTable, boolean performUpdate) throws SQLException {
+ initMirrorSubset(subsetTable, supersetTable, performUpdate, activeTableSchema);
+ }
+
+ /**
+ * Defines a mirror subset populating a subset table with primary keys from
+ * another table.
+ * Its name is saved into a special meta data table to enable automatic syncing
+ * (changes to the superset are propagated to the mirror subset).
+ *
+ * @param subsetTable name of the subset table
+ * @param supersetTable name of table to reference
+ * @throws SQLException
+ */
+ public void initMirrorSubset(String subsetTable, String supersetTable, boolean performUpdate, String schemaName)
+ throws SQLException {
+ // TODO if the supersetTable is actually a subset table, we must
+ // determine the correct schema of the data table which will eventually
+ // be referenced and create/insert into the mirrorTable there! Currently
+ // the mirrorTable can be located in the wrong places.
+ // table listing mirror tables
+ if (!subsetTable.contains("."))
+ subsetTable = dbConfig.getActivePGSchema().concat(".").concat(subsetTable);
+
+ // Create the mirror table list if not existing.
+ if (!tableExists(Constants.MIRROR_COLLECTION_NAME)) {
+ List columns = new ArrayList();
+ columns.add(Constants.MIRROR_COLUMN_DATA_TABLE_NAME + " text");
+ columns.add(Constants.MIRROR_COLUMN_SUBSET_NAME + " text");
+ columns.add(Constants.MIRROR_COLUMN_DO_RESET + " boolean DEFAULT true");
+ columns.add(String.format("CONSTRAINT %s_pkey PRIMARY KEY (%s)", Constants.MIRROR_COLLECTION_NAME.replace(".", ""),
+ Constants.MIRROR_COLUMN_SUBSET_NAME));
+ createTable(Constants.MIRROR_COLLECTION_NAME, columns,
+ "This table disposes the names of subset tables which mirror the data table " + supersetTable
+ + ". These subset tables will be updated as " + supersetTable
+ + " will obtains updates (insertions as well as deletions).");
+ }
+ // Create the actual subset and fill it to contain all primary key
+ // values of the data table.
+ initSubset(subsetTable, supersetTable, schemaName);
+
+ // Add the new subset table to the list of mirror subset tables.
+ String sql = null;
+ try (CoStoSysConnection conn = obtainOrReserveConnection()) {
+ Statement st = conn.createStatement();
+ sql = String.format("INSERT INTO %s VALUES ('%s','%s',%b)", Constants.MIRROR_COLLECTION_NAME, supersetTable, subsetTable,
+ performUpdate);
+ st.execute(sql);
+ } catch (SQLException e) {
+ LOG.error("Error executing SQL command: " + sql, e);
+ }
+ }
+
+ /**
+ * @param tableName table to gather mirror subsets for
+ * @return names of all mirror subsets for this table
+ */
+ private LinkedHashMap getMirrorSubsetNames(CoStoSysConnection conn, String tableName) {
+ if (!tableExists(conn, Constants.MIRROR_COLLECTION_NAME))
+ return null;
+
+ // The mirror tables are inserted into the collecting table with schema
+ // information. If the given data table is not qualified, we assume it
+ // to be in the same postgres scheme as the looked-up mirror subset
+ // collection table. And that is - for unqualified data tables - the
+ // active postgres scheme given in the configuration file (see
+ // 'getMirrorCollectionTableName' on how the mirror subset collection
+ // table name is determined).
+ if (!tableName.contains("."))
+ tableName = dbConfig.getActivePGSchema() + "." + tableName;
+
+ LinkedHashMap mirrorSubsetList = new LinkedHashMap<>();
+
+ try {
+ Statement stmt = conn.createStatement();
+ ResultSet rs = stmt.executeQuery(String.format(
+ "SELECT %s,%s FROM %s WHERE " + Constants.MIRROR_COLUMN_DATA_TABLE_NAME + "='%s'",
+ Constants.MIRROR_COLUMN_SUBSET_NAME, Constants.MIRROR_COLUMN_DO_RESET, Constants.MIRROR_COLLECTION_NAME, tableName));
+ while (rs.next()) {
+ String mirrorTable = rs.getString(1);
+ Boolean performUpdate = rs.getBoolean(2);
+ String refDataTable = getReferencedTable(mirrorTable);
+ if (refDataTable != null && refDataTable.equals(tableName))
+ mirrorSubsetList.put(mirrorTable, performUpdate);
+ }
+ } catch (SQLException e) {
+ e.printStackTrace();
+ }
+ return mirrorSubsetList;
+ }
+
+ /**
+ * Sets the values in the is_processed
, is_in_process
,
+ * has_errors
and log
columns of a subset to
+ * FALSE
.
+ *
+ * @param subsetTableName name of the subset to reset
+ */
+ public void resetSubset(String subsetTableName) {
+ resetSubset(subsetTableName, false, false, null);
+ }
+
+ /**
+ * Sets the values in the is_processed
, is_in_process
,
+ * has_errors
and log
columns of a subset to
+ * FALSE
where the corresponding rows are
+ * is_in_process
or is_processed
.
+ *
+ * The boolean parameter whereNotProcessed
is used for the use case
+ * where only those rows should be reset that are in_process
but
+ * not is_processed
which may happen when a pipeline crashed, a
+ * document has errors or a pipeline ist just canceled.
+ *
+ *
+ * In a similar fashion, whereNoErrors
resets those rows that have
+ * no errors.
+ *
+ *
+ * Both boolean parameters may be combined in which case only non-processed rows
+ * without errors will be reset.
+ *
+ *
+ * @param subsetTableName name of the table to reset unprocessed rows
+ */
+ public void resetSubset(String subsetTableName, boolean whereNotProcessed, boolean whereNoErrors,
+ String lastComponent) {
+ String stStr = null;
+ try (CoStoSysConnection conn = obtainOrReserveConnection()) {
+ List constraints = new ArrayList<>();
+ if (whereNotProcessed)
+ constraints.add(Constants.IS_PROCESSED + " = FALSE");
+ if (whereNoErrors)
+ constraints.add(Constants.HAS_ERRORS + " = FALSE");
+ if (lastComponent != null)
+ constraints.add(Constants.LAST_COMPONENT + " = '" + lastComponent + "'");
+ Statement st = conn.createStatement();
+ stStr = String.format(
+ "UPDATE %s SET %s = FALSE, %s = FALSE, %s='%s', %s = FALSE, %s = NULL, %s = NULL WHERE (%s = TRUE OR %s = TRUE)",
+ subsetTableName, Constants.IN_PROCESS, Constants.IS_PROCESSED, Constants.LAST_COMPONENT,
+ DEFAULT_PIPELINE_STATE, Constants.HAS_ERRORS, Constants.LOG, Constants.PROCESSING_TIMESTAMP,
+ Constants.IS_PROCESSED, Constants.IN_PROCESS);
+ if (!constraints.isEmpty())
+ stStr += " AND " + constraints.stream().collect(Collectors.joining(" AND "));
+ st.execute(stStr);
+ } catch (SQLException e) {
+ LOG.error("Error executing SQL command: " + stStr, e);
+ }
+ }
+
+ /**
+ * @param subsetTableName
+ * @param pkValues
+ * @return
+ */
+ public int[] resetSubset(CoStoSysConnection conn, String subsetTableName, List pkValues) {
+ return resetSubset(conn, subsetTableName, pkValues, activeTableSchema);
+ }
+
+ public int[] performBatchUpdate(CoStoSysConnection conn, List pkValues, String sqlFormatString, String schemaName) {
+
+ FieldConfig fieldConfig = fieldConfigs.get(schemaName);
+
+ String stStr = null;
+ List resultList = new ArrayList<>();
+ boolean autoCommit = true;
+ try {
+ autoCommit = conn.getAutoCommit();
+ conn.setAutoCommit(false);
+ String whereArgument = StringUtils.join(fieldConfig.expandPKNames("%s = ?"), " AND ");
+ stStr = String.format(sqlFormatString, whereArgument);
+
+ LOG.trace("Performing batch update with SQL command: {}", stStr);
+
+ PreparedStatement ps = conn.prepareStatement(stStr);
+ int i = 0;
+ for (Object[] id : pkValues) {
+ for (int j = 0; j < id.length; ++j) {
+ setPreparedStatementParameterWithType(j + 1, ps, id[j], fieldConfig.getPrimaryKey()[j],
+ fieldConfig);
+ }
+ ps.addBatch();
+
+ if (i >= commitBatchSize) {
+ int[] results = ps.executeBatch();
+ for (int result : results)
+ resultList.add(result);
+ conn.commit();
+ ps.clearBatch();
+ i = 0;
+ }
+ ++i;
+ }
+ int[] results = ps.executeBatch();
+ for (int result : results)
+ resultList.add(result);
+ conn.commit();
+
+ } catch (SQLException e) {
+ LOG.error("Error executing SQL command: " + stStr, e);
+ } finally {
+ try {
+ conn.setAutoCommit(autoCommit);
+ } catch (SQLException e) {
+ LOG.error("Could not set auto commit to its original value", e);
+ }
+ }
+ int[] ret = new int[resultList.size()];
+ for (int i = 0; i < ret.length; i++)
+ ret[i] = resultList.get(i);
+ return ret;
+ }
+
+ /**
+ * Sets the values in the is_processed
and
+ * is_in_process
rows of a subset to FALSE
. Only
+ * resets the subset table rows where the primary key equals one of the entries
+ * in pkValues
.
+ *
+ * @param subsetTableName - name of the table to reset
+ * @param pkValues - list of primary keys
+ * @return
+ */
+ public int[] resetSubset(CoStoSysConnection conn, String subsetTableName, List pkValues, String schemaName) {
+ // We intentionally do not check whether the rows are already reset
+ // because we want the only reason for the update to not affect a
+ // row to be that the row doesn't exist.
+ // The original where was: 'where (is_processed = TRUE OR
+ // is_in_process = TRUE) AND %s'
+ String updateFormatString = "UPDATE " + subsetTableName + " SET " + Constants.IS_PROCESSED + "=FALSE, "
+ + Constants.IN_PROCESS + "= FALSE, " + Constants.LAST_COMPONENT + "='" + DEFAULT_PIPELINE_STATE +"," + Constants.HOST_NAME + "=NULL"
+ + "' WHERE %s";
+ return performBatchUpdate(conn, pkValues, updateFormatString, schemaName);
+ }
+
+ public int[] determineExistingSubsetRows(CoStoSysConnection conn, String subsetTableName, List pkValues, String schemaName) {
+ String updateFormatString = "UPDATE " + subsetTableName + " SET has_errors = has_errors " + "where %s";
+ return performBatchUpdate(conn, pkValues, updateFormatString, schemaName);
+ }
+
+ /**
+ * @param xmls
+ * @param tableName
+ * @param identifier
+ * @see #importFromXML(Iterable, String, String, String)
+ */
+ public void importFromXML(Iterable xmls, String identifier, String tableName) {
+ importFromXML(xmls, tableName, identifier, activeTableSchema);
+ }
+
+ /**
+ * Imports XMLs into a table.
+ *
+ * @param xmls - an Iterator over XMLs as byte[]
+ * @param tableName - name of the table to import
+ * @param identifier - used for error messages
+ */
+ public void importFromXML(Iterable xmls, String tableName, String identifier, String schemaName) {
+ FieldConfig fieldConfig = fieldConfigs.get(schemaName);
+
+ for (byte[] xml : xmls) {
+ Iterator> it = JulieXMLTools.constructRowIterator(xml, BUFFER_SIZE,
+ fieldConfig.getForEachXPath(), fieldConfig.getFields(), identifier);
+ importFromRowIterator(it, tableName);
+ }
+ }
+
+ /**
+ * Import new medline XMLs in a existing table from an XML file or a directory
+ * of XML files. The XML must be in MEDLINE XML format and can additionally be
+ * (G)Zipped.
+ *
+ * @param fileStr - path to file or directory of (G)Zipped MEDLINE XML file(s)
+ * @param tableName - name of the target table
+ * @see #importFromXMLFile(String, String, String)
+ */
+ public void importFromXMLFile(String fileStr, String tableName) {
+ importFromXMLFile(fileStr, tableName, activeTableSchema);
+ }
+
+ /**
+ * Import new medline XMLs in a existing table from an XML file or a directory
+ * of XML files. The XML must be in MEDLINE XML format and can additionally be
+ * (G)Zipped.
+ *
+ * @param fileStr - path to file or directory of (G)Zipped MEDLINE XML file(s)
+ * @param tableName - name of the target table
+ * @param schemaName the table schema to use for the import
+ */
+ public void importFromXMLFile(String fileStr, String tableName, String schemaName) {
+ LOG.info("Starting import...");
+
+ FieldConfig fieldConfig = fieldConfigs.get(schemaName);
+
+ String[] fileNames;
+ File fileOrDir = new File(fileStr);
+ if (!fileOrDir.isDirectory()) {
+ fileNames = new String[1];
+ fileNames[0] = fileStr;
+ } else {
+ fileNames = fileOrDir.list(new FilenameFilter() {
+ public boolean accept(File arg0, String arg1) {
+ // TODO write accepted file extensions into configuration
+ return arg1.endsWith(".zip") || arg1.endsWith(".gz") || arg1.endsWith(".xml");
+ }
+ });
+ }
+ // medline files are sorted chronological
+ Arrays.sort(fileNames);
+ XMLPreparer xp = new XMLPreparer(fileOrDir, fieldConfig);
+ for (String fileName : fileNames) {
+ LOG.info("Importing " + fileName);
+ Iterator> it = xp.prepare(fileName);
+ importFromRowIterator(it, tableName, true, schemaName);
+ }
+ }
+
+ /**
+ * @param fileStr
+ * @param tableName
+ * @see #updateFromXML(String, String)
+ */
+ public void updateFromXML(String fileStr, String tableName) {
+ updateFromXML(fileStr, tableName, activeTableSchema);
+ }
+
+ /**
+ * Updates an existing database. If the file contains new entries those are
+ * inserted, otherwise the table is updated to the version in the file.
+ *
+ * @param fileStr - file containing new or updated entries
+ * @param tableName - table to update
+ */
+ public void updateFromXML(String fileStr, String tableName, String schemaName) {
+ FieldConfig fieldConfig = fieldConfigs.get(schemaName);
+
+ // TODO deprecated way of determining the primary key fields?! Make sure
+ // and use appropriate method of FieldConfig.
+ List pks = new ArrayList();
+ List> fields = fieldConfig.getFields();
+ for (Map field : fields)
+ if (field.containsKey("primaryKey"))
+ if (field.get("primaryKey").equals(true))
+ pks.add(field.get("name"));
+ LOG.info("Starting update...");
+
+ String[] fileNames;
+ File fileOrDir = new File(fileStr);
+ if (!fileOrDir.isDirectory()) {
+ fileNames = new String[1];
+ fileNames[0] = fileStr;
+ } else {
+ fileNames = fileOrDir.list(new FilenameFilter() {
+ public boolean accept(File arg0, String arg1) {
+ // TODO write accepted file extensions in configuration
+ // file
+ return arg1.endsWith(".zip") || arg1.endsWith(".gz") || arg1.endsWith(".xml");
+ }
+ });
+ }
+
+ // in medline, the files are ordered chronological
+ Arrays.sort(fileNames);
+ XMLPreparer xp = new XMLPreparer(fileOrDir, fieldConfig);
+ for (String fileName : fileNames) {
+ LOG.info("Updating from " + fileName);
+ Iterator> fileIt = xp.prepare(fileName);
+ updateFromRowIterator(fileIt, tableName, true, schemaName);
+ }
+ }
+
+ /**
+ * @param it
+ * @param tableName
+ */
+ public void importFromRowIterator(Iterator> it, String tableName) {
+ importFromRowIterator(it, tableName, true, activeTableSchema);
+ }
+
+ /**
+ * @param it
+ * @param tableName
+ */
+ public void importFromRowIterator(Iterator> it, String tableName, String tableSchema) {
+ importFromRowIterator(it, tableName, true, tableSchema);
+ }
+
+ /**
+ * Internal method to import into an existing table
+ *
+ * @param it - an Iterator, yielding rows to insert into the database
+ * @param tableName - the updated table
+ * @param commit - if true , the inserted data will be committed in batches
+ * within this method; no commits will happen otherwise.
+ * @param schemaName the name of the table schema corresponding to the data table
+ */
+ public void importFromRowIterator(Iterator> it, String tableName,
+ boolean commit, String schemaName) {
+ // Fast return to spare some unnecessary communication with the
+ // database.
+ if (!it.hasNext())
+ return;
+
+ FieldConfig fieldConfig = fieldConfigs.get(schemaName);
+
+ String dataImportStmtString = constructImportStatementString(tableName, fieldConfig);
+ String mirrorUpdateStmtString = constructMirrorInsertStatementString(fieldConfig);
+
+ boolean wasAutoCommit = true;
+
+ try (CoStoSysConnection conn = obtainOrReserveConnection()) {
+ wasAutoCommit = conn.getAutoCommit();
+ // Get the list of mirror subsets in which all new primary keys must
+ // be inserted as well.
+ LinkedHashMap mirrorNames = getMirrorSubsetNames(conn, tableName);
+
+ conn.setAutoCommit(false);
+ PreparedStatement psDataImport = conn.prepareStatement(dataImportStmtString);
+
+ List mirrorStatements = null;
+ if (mirrorNames != null) {
+ mirrorStatements = new ArrayList<>();
+ for (String mirror : mirrorNames.keySet()) {
+ mirrorStatements.add(conn.prepareStatement(String.format(mirrorUpdateStmtString, mirror)));
+ }
+ }
+ List> fields = fieldConfig.getFields();
+ int i = 0;
+ while (it.hasNext()) {
+ Map row = it.next();
+ for (int j = 0; j < fields.size(); j++) {
+ Map field = fields.get(j);
+ String fieldName = field.get(JulieXMLConstants.NAME);
+ setPreparedStatementParameterWithType(j + 1, psDataImport, row.get(fieldName), fieldName,
+ fieldConfig);
+ }
+ psDataImport.addBatch();
+
+ if (mirrorStatements != null) {
+ for (PreparedStatement ps : mirrorStatements) {
+ for (int j = 0; j < fieldConfig.getPrimaryKey().length; j++) {
+ String fieldName = fieldConfig.getPrimaryKey()[j];
+ setPreparedStatementParameterWithType(j + 1, ps, row.get(fieldName), fieldName,
+ fieldConfig);
+ }
+ ps.addBatch();
+ }
+ }
+
+ ++i;
+ if (i >= commitBatchSize) {
+ psDataImport.executeBatch();
+ if (mirrorStatements != null)
+ for (PreparedStatement ps : mirrorStatements)
+ ps.executeBatch();
+ // NOTE If a fast return from a commit is required, rather
+ // use
+ // Postgres asynchroneous commit
+ // (http://www.postgresql.org/docs/9.1/static/wal-async-commit.html)
+ // commit(conn);
+ if (commit)
+ conn.commit();
+ psDataImport = conn.prepareStatement(dataImportStmtString);
+ i = 0;
+ }
+ }
+ if (i > 0) {
+ psDataImport.executeBatch();
+ if (commit)
+ conn.commit();
+ if (mirrorStatements != null)
+ for (PreparedStatement ps : mirrorStatements)
+ ps.executeBatch();
+ // NOTE If a fast return from a commit is required, rather
+ // use
+ // Postgres asynchroneous commit
+ // (http://www.postgresql.org/docs/9.1/static/wal-async-commit.html)
+ // commit(conn);
+ if (commit)
+ conn.commit();
+ conn.setAutoCommit(wasAutoCommit);
+ }
+ } catch (SQLException e) {
+ LOG.error("SQLException while trying to insert: ", e);
+ SQLException nextException = e.getNextException();
+ if (nextException != null) {
+ LOG.error("Next exception: ", nextException);
+ }
+ throw new CoStoSysSQLRuntimeException(e);
+ } finally {
+ try {
+ if (commitThread != null)
+ commitThread.join();
+ } catch (InterruptedException e) {
+ throw new CoStoSysRuntimeException(e);
+ }
+ }
+ }
+
+ /**
+ *
+ * Updates a table with the entries yielded by the iterator. If the entries is
+ * not yet in the table, it will be inserted instead.
+ *
+ *
+ * The input rows are expected to fit the active table schema.
+ *
+ *
+ * @param it - an Iterator, yielding new or updated entries.
+ * @param tableName - the updated table
+ */
+ public void updateFromRowIterator(Iterator> it, String tableName) {
+ updateFromRowIterator(it, tableName, true, activeTableSchema);
+ }
+
+ /**
+ *
+ * Updates a table with the entries yielded by the iterator. If the entries is
+ * not yet in the table, it will be inserted instead.
+ *
+ *
+ * The input rows are expected to fit the table schema schemaName
.
+ *
+ *
+ * @param it
+ * - an Iterator, yielding new or updated entries.
+ * @param tableName
+ * - the updated table
+ */
+
+ /**
+ *
+ * Updates a table with the entries yielded by the iterator. If the entries is
+ * not yet in the table, it will be inserted instead.
+ *
+ *
+ * The input rows are expected to fit the table schema schemaName
.
+ *
+ * @param it - an Iterator, yielding new or updated entries.
+ * @param tableName - the updated table
+ * @param commit - if true , the updated data will be committed in batches
+ * within this method; nothing will be commit otherwise.
+ * @param schemaName the name of the table schema corresponding to the updated data
+ * table
+ */
+ public void updateFromRowIterator(Iterator> it, String tableName,
+ boolean commit, String schemaName) {
+ // Fast return to avoid unnecessary communication with the database.
+ if (!it.hasNext())
+ return;
+
+ FieldConfig fieldConfig = fieldConfigs.get(schemaName);
+
+ String statementString = constructUpdateStatementString(tableName, fieldConfig);
+ String mirrorInsertStmtString = constructMirrorInsertStatementString(fieldConfig);
+
+ // this is just a default value in case the next line throws an exception
+ boolean wasAutoCommit = true;
+ try (CoStoSysConnection conn = obtainOrReserveConnection()) {
+ wasAutoCommit = conn.getAutoCommit();
+ LOG.trace("Retrieving mirror subsets of table {}", tableName);
+ LinkedHashMap mirrorNames = getMirrorSubsetNames(conn, tableName);
+
+ List mirrorStatements = null;
+ if (mirrorNames != null) {
+ mirrorStatements = new ArrayList<>();
+ for (String mirror : mirrorNames.keySet()) {
+ mirrorStatements.add(conn.prepareStatement(String.format(mirrorInsertStmtString, mirror)));
+ }
+ }
+
+ final int sliceSize = 10000;
+ LOG.trace("Reading update data slice up to {} documents. Within this slice, duplicate document IDs will be handled by only taking the last document into account.", sliceSize);
+
+ String[] primaryKey = fieldConfig.getPrimaryKey();
+ // This is an outer loop to help us cut the documents we get from the iterator in slices. This is very
+ // useful or even required when reading large archives from a single iterator.
+ while (it.hasNext()) {
+ // This map will assemble for each primary key only the NEWEST (in
+ // XML the latest in Medline) row. Its size is an approximation of
+ // Medline blob XML files.
+ // TODO we should actually check for the PMID version and take the highest
+ Map> rowsByPk = new HashMap<>();
+ while (it.hasNext() && rowsByPk.size() < sliceSize) {
+ Map row = it.next();
+ StringBuilder rowPrimaryKey = new StringBuilder();
+ for (int j = 0; j < primaryKey.length; j++) {
+ String keyFieldName = primaryKey[j];
+ Object key = row.get(keyFieldName);
+ rowPrimaryKey.append(key);
+
+ }
+ String pk = rowPrimaryKey.toString();
+ rowsByPk.put(pk, row);
+ }
+
+ PreparedStatement ps = conn.prepareStatement(statementString);
+ List> fields = fieldConfig.getFields();
+ List> cache = new ArrayList<>(commitBatchSize);
+ int i = 0;
+ for (Map row : rowsByPk.values()) {
+
+ for (int j = 0; j < fields.size() + primaryKey.length; j++) {
+ if (j < fields.size()) {
+ Map field = fields.get(j);
+ String fieldName = field.get(JulieXMLConstants.NAME);
+ setPreparedStatementParameterWithType(j + 1, ps, row.get(fieldName), null, null);
+ } else {
+ String key = primaryKey[j - fields.size()];
+ Object keyValue = row.get(key);
+ setPreparedStatementParameterWithType(j + 1, ps, keyValue, null, null);
+ }
+ }
+ ps.addBatch();
+ cache.add(row);
+
+ ++i;
+ if (i >= commitBatchSize) {
+ LOG.trace("Committing batch of size {}", i);
+ executeAndCommitUpdate(tableName, conn, commit, schemaName, fieldConfig, mirrorNames,
+ mirrorStatements, ps, cache);
+ cache.clear();
+ i = 0;
+ }
+ }
+ if (i > 0) {
+ LOG.trace("Committing last batch of size {}", i);
+ executeAndCommitUpdate(tableName, conn, commit, schemaName, fieldConfig, mirrorNames,
+ mirrorStatements, ps, cache);
+ }
+ conn.setAutoCommit(wasAutoCommit);
+ }
+ } catch (SQLException e) {
+ LOG.error(
+ "SQL error while updating table {}. Database configuration is: {}. Table schema configuration is: {}",
+ tableName, dbConfig, fieldConfig, e);
+ SQLException nextException = e.getNextException();
+ if (null != nextException) {
+ LOG.error("Next exception was: ", nextException);
+ }
+ throw new CoStoSysSQLRuntimeException(e);
+ } finally {
+ try {
+ if (commitThread != null)
+ commitThread.join();
+
+ } catch (InterruptedException e) {
+ throw new CoStoSysRuntimeException(e);
+ }
+ }
+ }
+
+ /**
+ * Performs the actual update in the database. Additionally manages the
+ * appropriate reset of rows in mirror subsets and the addition of missing rows
+ * in mirror subsets.
+ *
+ * @param tableName
+ * @param externalConn
+ * @param commit
+ * @param schemaName
+ * @param fieldConfig
+ * @param mirrorNames
+ * @param mirrorStatements
+ * @param ps
+ * @param cache
+ * @throws SQLException
+ */
+ private void executeAndCommitUpdate(String tableName, CoStoSysConnection externalConn, boolean commit, String schemaName,
+ FieldConfig fieldConfig, LinkedHashMap mirrorNames,
+ List mirrorStatements, PreparedStatement ps, List> cache)
+ throws SQLException {
+ boolean wasAutoCommit = externalConn.getAutoCommit();
+ try {
+ externalConn.setAutoCommit(false);
+ int[] returned = ps.executeBatch();
+
+ List> toInsert = new ArrayList<>(commitBatchSize);
+ List> toResetRows = new ArrayList<>(commitBatchSize);
+ List toResetPKs = new ArrayList<>();
+
+ fillUpdateLists(cache, returned, toInsert, toResetPKs, toResetRows, fieldConfig);
+ importFromRowIterator(toInsert.iterator(), tableName, commit, schemaName);
+ // Do a commit to end the transaction. This is sometimes even necessary
+ // because following transactions would be blocked otherwise.
+ LOG.trace("Committing updates to the data table.");
+ externalConn.commit();
+ if (mirrorNames != null) {
+ LOG.trace("Applying updates to mirror subsets:");
+ List> toInsertMirror = new ArrayList<>(commitBatchSize);
+ Iterator mirrorNamesIt = mirrorNames.keySet().iterator();
+ Iterator mirrorStatementsIt = mirrorStatements.iterator();
+ for (int j = 0; j < mirrorNames.size(); j++) {
+ String mirrorName = mirrorNamesIt.next();
+ LOG.trace("Applying to mirror subset \"{}\"", mirrorName);
+ // The mirrorNames hashmap has as values booleans telling
+ // whether to reset a mirror table or not. If not, we still want
+ // to know whether there are any missing rows and insert them.
+ if (mirrorNames.get(mirrorName)) {
+ LOG.trace("Resetting updated rows.");
+ returned = resetSubset(externalConn, mirrorName, toResetPKs, schemaName);
+ } else {
+ LOG.trace("Updates rows are NOT reset.");
+ returned = determineExistingSubsetRows(externalConn, mirrorName, toResetPKs, schemaName);
+ }
+ // Possibly some update documents don't even exist
+ // in a mirror subset. This shouldn't happen of
+ // course, but it might due to errors. This allows
+ // to repair the error by an update instead of
+ // deleting the missing data from the data table and
+ // re-import it.
+ fillUpdateLists(toResetRows, returned, toInsertMirror, null, null, fieldConfig);
+ if (toInsertMirror.size() > 0) {
+ LOG.trace("{} updated rows where not found in this mirror subset. They will be added");
+ // The mirror insert statements are a parallel list
+ // to mirrorNames, thus the jth mirrorName belong to
+ // the jth insert statement.
+ PreparedStatement mirrorPS = mirrorStatementsIt.next();
+ for (Map missingMirrorRow : toInsertMirror) {
+ for (int k = 0; k < fieldConfig.getPrimaryKey().length; k++) {
+ String fieldName = fieldConfig.getPrimaryKey()[k];
+ setPreparedStatementParameterWithType(k + 1, mirrorPS, missingMirrorRow.get(fieldName),
+ fieldName, fieldConfig);
+ }
+ mirrorPS.addBatch();
+ }
+ mirrorPS.executeBatch();
+ toInsertMirror.clear();
+ } else {
+ LOG.trace("All updated rows exist in the mirror subset.");
+ }
+ }
+ }
+
+ if (commit) {
+ LOG.trace("Committing updates.");
+ externalConn.commit();
+ }
+ } finally {
+ externalConn.setAutoCommit(wasAutoCommit);
+ }
+ }
+
+ /**
+ *
+ * Prepares lists of documents to insert into a table and primary keys for which
+ * mirror subsets must be reseted because the respective documents in the data
+ * table have been updated. The preparation happens basing on the return value
+ * of an SQL operation trying to operate on a set of documents, e.g. updating
+ * them. A batch UPDATE command, for instance, returns an int[] where for each
+ * batch item 0 indicates non-success (could not be updated, presumably because
+ * the primary key in the update command does not exist) and 1 indicates
+ * success.
+ * Successful updated documents must be reseted in the mirror subsets, documents
+ * that could not be updated (and thus don't exist) must be inserted.
+ *
+ *
+ * @param cache Input: The list of rows for which the original SQL command was
+ * issued that returned the values in returned . Must be
+ * parallel to returned .
+ * @param returned Input: The return values of the SQL command issued on base of the
+ * rows contained in cache .
+ * @param toInsert Output: Rows from cache filtered by "corresponding value
+ * in returned was <= 0 (non-success)".
+ * @param toResetPKs Output: Primary keys from cache rows for which
+ * returned holds a value >0 (e.g. successful update).
+ * @param toResetRows Output, may be null: The rows from cache for which
+ * returned holds a value >0.
+ * @param fieldConfig Input: Field configuration to determine the correct primary key.
+ */
+ private void fillUpdateLists(List> cache, int[] returned, List> toInsert,
+ List toResetPKs, List> toResetRows, FieldConfig fieldConfig) {
+ for (int j = 0; j < returned.length; ++j) {
+ Map newRow = cache.get(j);
+ if (returned[j] <= 0) {
+ toInsert.add(newRow);
+ } else {
+ if (null != toResetPKs) {
+ Object[] pkValues = new Object[fieldConfig.getPrimaryKey().length];
+ for (int k = 0; k < pkValues.length; k++) {
+ String pkColumn = fieldConfig.getPrimaryKey()[k];
+ pkValues[k] = newRow.get(pkColumn);
+ }
+ toResetPKs.add(pkValues);
+ }
+ if (null != toResetRows)
+ toResetRows.add(newRow);
+ }
+ }
+ }
+
+ /**
+ * Creates an SQL-template, usable in prepared statements which add new values
+ * into a table
+ *
+ * @param fieldConfig - used to get the primary key, as the template must contain it
+ * @return - an SQL string for inserting, containing a '?' for every primary key
+ * and a %s for the table name
+ */
+ private String constructMirrorInsertStatementString(FieldConfig fieldConfig) {
+ String stmtTemplate = "INSERT INTO %s (%s) VALUES (%s)";
+ String pkStr = fieldConfig.getPrimaryKeyString();
+ String[] wildCards = new String[fieldConfig.getPrimaryKey().length];
+ for (int i = 0; i < wildCards.length; i++)
+ wildCards[i] = "?";
+ String wildCardStr = StringUtils.join(wildCards, ",");
+ return String.format(stmtTemplate, "%s", pkStr, wildCardStr);
+ }
+
+ /**
+ * Constructs an SQL prepared statement for import of data rows into the
+ * database table tableName
according to the field schema
+ * definition.
+ *
+ * Example:
+ *
+ * If the field schema contains two rows 'pmid' and 'xml', the statement
+ * expressions expects all these rows to be filled. The resulting String will be
+ *
+ *
INSERT INTO (pmid,xml) VALUES (?,?)
+ *
+ * @param tableName Name of the database table to import data into.
+ * @param fieldDefinition A {@link FieldConfig} object determining the rows to be imported.
+ * @return An SQL prepared statement string for import of data into the table.
+ */
+ private String constructImportStatementString(String tableName, FieldConfig fieldDefinition) {
+ String stmtTemplate = "INSERT INTO %s (%s) VALUES (%s)";
+ List> fields = fieldDefinition.getFields();
+ StringBuilder columnsStrBuilder = new StringBuilder();
+ StringBuilder valuesStrBuilder = new StringBuilder();
+ for (int i = 0; i < fields.size(); ++i) {
+ columnsStrBuilder.append(fields.get(i).get(JulieXMLConstants.NAME));
+ if (fields.get(i).get(JulieXMLConstants.TYPE).equals("xml"))
+ valuesStrBuilder.append("XMLPARSE(CONTENT ?)");
+ else
+ valuesStrBuilder.append("?");
+ if (i < fields.size() - 1) {
+ columnsStrBuilder.append(",");
+ valuesStrBuilder.append(",");
+ }
+ }
+ return String.format(stmtTemplate, tableName, columnsStrBuilder.toString(), valuesStrBuilder.toString());
+ }
+
+
+ /**
+ * Constructs an SQL prepared statement for updating data rows in the database
+ * table tableName
according to the field schema definition.
+ *
+ * Example:
+ *
+ * If the field schema contains two rows ('pmid' and 'xml') and pmid is primary
+ * key, the resulting String will be
+ *
+ *
UPDATE SET pmid=?, xml=? WHERE pmid=?
+ *
+ * @param tableName Name of the database table to import data into.
+ * @param fieldDefinition A {@link FieldConfig} object determining the rows to be imported.
+ * @return An SQL prepared statement string for import of data into the table.
+ */
+ private String constructUpdateStatementString(String tableName, FieldConfig fieldDefinition) {
+ String stmtTemplate = "UPDATE %s SET %s WHERE %s";
+ List> fields = fieldDefinition.getFields();
+ StringBuilder newValueStrBuilder = new StringBuilder();
+ for (int i = 0; i < fields.size(); ++i) {
+ newValueStrBuilder.append(fields.get(i).get(JulieXMLConstants.NAME));
+ if (fields.get(i).get(JulieXMLConstants.TYPE).equals("xml"))
+ newValueStrBuilder.append("=XMLPARSE(CONTENT ?)");
+ else
+ newValueStrBuilder.append("=?");
+ if (i < fields.size() - 1)
+ newValueStrBuilder.append(",");
+ }
+ String[] primaryKeys = fieldDefinition.getPrimaryKey();
+ StringBuilder conditionStrBuilder = new StringBuilder();
+ for (int i = 0; i < primaryKeys.length; ++i) {
+ String key = primaryKeys[i];
+ conditionStrBuilder.append(key).append("=?");
+ if (i < primaryKeys.length - 1)
+ conditionStrBuilder.append(" AND ");
+ }
+ String statementString = String.format(stmtTemplate, tableName, newValueStrBuilder.toString(),
+ conditionStrBuilder.toString());
+ LOG.trace("PreparedStatement update command: {}", statementString);
+ return statementString;
+ }
+
+ /**
+ * Alters an table, executing the supplied action
+ *
+ * @param action - SQL fragment, specifiying how to alter the table
+ * @param tableName - table to alter
+ */
+ private void alterTable(String action, String tableName) {
+
+ String sqlString = "ALTER TABLE " + tableName + " " + action;
+ try (CoStoSysConnection conn = obtainOrReserveConnection()) {
+ Statement st = conn.createStatement();
+ st.execute(sqlString);
+ } catch (SQLException e) {
+ e.printStackTrace();
+ }
+ }
+
+ /**
+ * @param ids
+ * @param table
+ * @param timestamp
+ * @return
+ * @see #queryWithTime(List, String, String, String)
+ */
+ public DBCIterator queryWithTime(List ids, String table, String timestamp) {
+ return queryWithTime(ids, table, timestamp, activeTableSchema);
+ }
+
+ /********************************
+ * Data Retrieval
+ ****************************************************************************************************/
+ /*
+ * Speed: (tested by repeated queries, using a pool-pc and 1000 as batchSize)
+ * queryAll() fetched 8.5 documents/ms (33min for whole db with 16.9*10e6
+ * documents) query(ids) fetched 9.3 documents/ms (9.3sec for 10e5 documents of
+ * a PMID sample)
+ */
+
+ /**
+ * Returns an iterator over all rows in the table with matching id and a
+ * timestamp newer (>) than timestamp
. The Iterator will use
+ * threads, memory and a connection until all matches are returned.
+ *
+ * @param ids - List with primary keys
+ * @param table - table to query
+ * @param timestamp - timestamp (only rows with newer timestamp are returned)
+ * @return - pmid and xml as an Iterator
+ */
+ public DBCIterator queryWithTime(List ids, String table, String timestamp, String schemaName) {
+ FieldConfig fieldConfig = fieldConfigs.get(schemaName);
+ String timestampWhere = fieldConfig.getTimestampFieldName() + " > " + timestamp;
+ return new ThreadedColumnsToRetrieveIterator(this, ids, table, timestampWhere, schemaName);
+ }
+
+ /**
+ * Returns an iterator over the column field
in the table
+ * table
. NOTE: The Iterator will use threads, memory and a
+ * connection until the iterator is empty, i.e. hasNext()
returns
+ * null!
+ *
+ * @param fields - field to return
+ * @param table - table to query
+ * @return - results as an Iterator
+ */
+ public DBCIterator queryAll(List fields, String table) {
+ return new ThreadedColumnsIterator(this, fields, table);
+ }
+
+ /**
+ * Returns the requested fields from the requested table. The iterator must be fully consumed or dangling threads
+ * and connections will remain, possible causing the application to wait forever for an open connection.
+ *
+ * @param table The table to query.
+ * @param fields The names of the columns to retrieve values from.
+ * @return An iterator over the requested columns values.
+ */
+ public DBCIterator query(String table, List fields) {
+ return new ThreadedColumnsIterator(this, fields, table);
+ }
+
+ /**
+ * Returns the requested fields from the requested table. The iterator must be fully consumed or dangling threads
+ * and connections will remain, possible causing the application to wait forever for an open connection.
+ *
+ * @param table The table to query.
+ * @param fields The names of the columns to retrieve values from.
+ * @param limit A limit of documents to retrieve.
+ * @return An iterator over the requested columns values.
+ */
+ public DBCIterator query(String table, List fields, long limit) {
+ return new ThreadedColumnsIterator(this, fields, table, limit);
+ }
+
+ /**
+ * Returns the values the the column {@link #DEFAULT_FIELD} in the given table.
+ * The Iterator will use threads, memory and a connection until all matches were
+ * returned.
+ *
+ * @param keys
+ * @param table
+ * @return
+ * @see #query(List, String, String)
+ */
+ public DBCIterator query(List keys, String table) {
+ return new ThreadedColumnsIterator(this, keys, Collections.singletonList(DEFAULT_FIELD), table, activeTableSchema);
+ }
+
+ /**
+ * Returns the values the the column {@link #DEFAULT_FIELD} in the given table. The
+ * Iterator will use threads, memory and a connection until all matches were
+ * returned.
+ *
+ * @param keys - list of String[] containing the parts of the primary key
+ * @param table - table to query
+ * @return - results as an Iterator
+ */
+ public DBCIterator query(List keys, String table, String schemaName) {
+ return new ThreadedColumnsIterator(this, keys, Collections.singletonList(DEFAULT_FIELD), table, schemaName);
+ }
+
+ /**
+ * Retrieves row values of table
from the database. The returned columns are those
+ * that are configuration to be retrieved in the active table schema.
+ *
+ * @param ids
+ * @param table
+ * @return
+ * @see #retrieveColumnsByTableSchema(List, String, String)
+ */
+ public DBCIterator retrieveColumnsByTableSchema(List ids, String table) {
+ return retrieveColumnsByTableSchema(ids, table, activeTableSchema);
+ }
+
+ /**
+ * Retrieves row values of table
from the database. The returned columns are those
+ * that are configuration to be retrieved in the table schema with name schemaName
.
+ *
+ * @param ids
+ * @param table
+ * @param schemaName
+ * @return
+ */
+ public DBCIterator retrieveColumnsByTableSchema(List ids, String table, String schemaName) {
+ return new ThreadedColumnsToRetrieveIterator(this, ids, table, schemaName);
+ }
+
+ /**
+ * Retrieves data from the database over multiple tables. All tables will be joined on the given IDs.
+ * The columns to be retrieved for each table is determined by its table schema. For this purpose, the
+ * tables
and schemaName
arrays are required to be parallel.
+ *
+ * @param ids A list of primary keys identifying the items to retrieve.
+ * @param tables The tables from which the items should be retrieved that are identified by ids
.
+ * @param schemaNames A parallel array to tables
thas specifies the table schema name of each table.
+ * @return The joined data from the requested tables.
+ */
+ public DBCIterator retrieveColumnsByTableSchema(List ids, String[] tables, String[] schemaNames) {
+ return new ThreadedColumnsToRetrieveIterator(this, ids, tables, schemaNames);
+ }
+
+ /**
+ *
+ * Returns all column data from the data table tableName
which is
+ * marked as 'to be retrieved' in the table scheme specified by the active table
+ * scheme.
+ *
+ *
+ * For more specific information, please refer to
+ * {@link #queryDataTable(String, String, String)}.
+ *
+ *
+ * @param tableName Name of a data table.
+ * @param whereCondition Optional additional specifications for the SQL "SELECT" statement.
+ * @see #queryDataTable(String, String, String)
+ */
+ public DBCIterator queryDataTable(String tableName, String whereCondition) {
+ return queryDataTable(tableName, whereCondition, activeTableSchema);
+ }
+
+ /**
+ *
+ * Returns all column data from the data table tableName
which is
+ * marked as 'to be retrieved' in the table scheme specified by
+ * schemaName
.
+ *
+ *
+ * This method offers direct access to the table data by using an SQL
+ * ResultSet
in cursor mode, allowing for queries leading to large
+ * results.
+ *
+ *
+ * An optional where clause (actually everything behind the "FROM" in the SQL
+ * select statement) may be passed to restrict the columns being returned. All
+ * specifications are allowed which do not alter the number of columns returned
+ * (like "GROUP BY").
+ *
+ *
+ * @param tableName Name of a data table.
+ * @param whereCondition Optional additional specifications for the SQL "SELECT" statement.
+ * @param schemaName The table schema name to determine which columns should be
+ * retrieved. // * @return An iterator over byte[][]
.
+ * Each returned byte array contains one nested byte array for each
+ * retrieved column, holding the column's data in a sequence of
+ * bytes.
+ */
+ public DBCIterator queryDataTable(String tableName, String whereCondition, String schemaName) {
+ if (!withConnectionQueryBoolean(c -> c.tableExists(tableName)))
+ throw new IllegalArgumentException("Table \"" + tableName + "\" does not exist.");
+
+ final FieldConfig fieldConfig = fieldConfigs.get(schemaName);
+
+ // Build the correct query.
+ String query = null;
+ String selectedColumns = StringUtils.join(fieldConfig.getColumnsToRetrieve(), ",");
+ // prepend there WHERE keyword if not already present and if we don't
+ // actually have only a LIMIT constraint
+ if (whereCondition != null && !whereCondition.trim().toUpperCase().startsWith("WHERE")
+ && !whereCondition.trim().toUpperCase().matches("LIMIT +[0-9]+"))
+ query = String.format("SELECT %s FROM %s WHERE %s", selectedColumns, tableName, whereCondition);
+ else if (whereCondition != null)
+ query = String.format("SELECT %s FROM %s %s", selectedColumns, tableName, whereCondition);
+ else
+ query = String.format("SELECT %s FROM %s", selectedColumns, tableName);
+ final String finalQuery = query;
+
+ try {
+
+ DBCIterator it = new DBCIterator() {
+
+ private CoStoSysConnection conn = reserveConnection();
+ private ResultSet rs = doQuery(conn);
+ private boolean hasNext = rs.next();
+
+ private ResultSet doQuery(CoStoSysConnection conn) throws SQLException {
+ // Get a statement which is set to cursor mode. The data
+ // table could
+ // be really large and we don't have the two fold process
+ // here where
+ // first we get IDs from a subset and then only the actual
+ // documents
+ // for these IDs.
+ conn.setAutoCommit(false);
+ Statement stmt = conn.createStatement();
+ stmt.setFetchSize(queryBatchSize);
+ return stmt.executeQuery(finalQuery);
+ }
+
+ @Override
+ public boolean hasNext() {
+ if (!hasNext)
+ close();
+ return hasNext;
+ }
+
+ @Override
+ public byte[][] next() {
+ if (hasNext) {
+ List> fields = fieldConfig.getFields();
+ try {
+ byte[][] retrievedData = new byte[fieldConfig.getColumnsToRetrieve().length][];
+ for (int i = 0; i < retrievedData.length; i++) {
+ retrievedData[i] = rs.getBytes(i + 1);
+ if (Boolean.parseBoolean(fields.get(i).get(JulieXMLConstants.GZIP)))
+ retrievedData[i] = JulieXMLTools.unGzipData(retrievedData[i]);
+ }
+ hasNext = rs.next();
+ if (!hasNext)
+ close();
+ return retrievedData;
+ } catch (SQLException | IOException e) {
+ hasNext = false;
+ e.printStackTrace();
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void close() {
+ conn.close();
+ }
+ };
+
+ return it;
+ } catch (SQLException e) {
+ LOG.error("Error while executing SQL statement \"" + finalQuery + "\"");
+ e.printStackTrace();
+ }
+
+ return null;
+ }
+
+ /**
+ * @param tableName
+ * @param limitParam
+ * @return
+ * @throws SQLException
+ */
+ public DBCIterator querySubset(String tableName, long limitParam) throws SQLException {
+ return querySubset(tableName, null, limitParam, 0, activeTableSchema);
+ }
+
+ public int getQueryBatchSize() {
+ return queryBatchSize;
+ }
+
+ public void setQueryBatchSize(int queryBatchSize) {
+ this.queryBatchSize = queryBatchSize;
+ }
+
+ /**
+ *
+ * Retrieves XML field values in the data table referenced by the subset table
+ * tableName
or tableName
itself if it is a data
+ * table.
+ *
+ *
+ * The method always first retrieves a batch of primary keys from the subset
+ * table and then gets the actual documents from the data table (necessary for
+ * the data table - subset paradigm). As this is unnecessary when querying
+ * directly from a data table, for that kind of queries this method calls
+ * {@link #queryDataTable(String, String, String)}.
+ *
+ *
+ * The number of returned documents is restricted in number by
+ * limitParam
. All documents are returned if
+ * limitParam
is of negative value.
+ * Note: Of course, whereClause
could already contain an SQL
+ * 'LIMIT' specification. However, I won't work as expected since this limit
+ * expression would be applied to each batch of subset-IDs which is used to
+ * query the data table. Using the limitParam
parameter will assure
+ * you get at most as much documents from the iterator as specified. If
+ * tableName
denotes a data table and whereClause
does
+ * not already contain a 'LIMIT' expression, limitParam
will be
+ * added to whereClause
for the subsequent call to
+ * queryDataTable
.
+ *
+ *
+ * @param tableName Subset table determining which documents to retrieve from the data
+ * table; may also be a data table itself.
+ * @param whereClause An SQL where clause restricting the returned columns of each
+ * queried subset-ID batch. This clause must not change the rows
+ * returned (e.g. by 'GROUP BY').
+ * @param limitParam Number restriction of documents to return.
+ * @param numberRefHops
+ * @param schemaName The name of table schema of the referenced data table.
+ * @return An iterator returning documents references from or in the table
+ * tableName
.
+ * @throws SQLException
+ * @see #queryDataTable(String, String, String)
+ */
+ public DBCIterator querySubset(final String tableName, final String whereClause, final long limitParam,
+ Integer numberRefHops, final String schemaName) throws SQLException {
+ if (!withConnectionQueryBoolean(c -> c.tableExists(tableName)))
+ throw new IllegalArgumentException("Table \"" + tableName + "\" does not exist.");
+
+ final FieldConfig fieldConfig = fieldConfigs.get(schemaName);
+ final String dataTable = withConnectionQueryString(c -> c.getReferencedTable(tableName, numberRefHops));
+ if (dataTable.equals(tableName)) {
+ String newWhereClause = whereClause;
+ if (newWhereClause == null && limitParam > 0)
+ newWhereClause = "";
+ // For the current method, limit must be given explicitly. Not so
+ // for querying a single table like the data table. If the
+ // whereClause not already contains a LIMIT expression, we just add
+ // it corresponding to the limit parameter.
+ if (limitParam > 0 && !newWhereClause.toLowerCase().matches(".*limit +[0-9]+.*"))
+ newWhereClause += " LIMIT " + limitParam;
+ return queryDataTable(tableName, newWhereClause, schemaName);
+ }
+
+
+ try (final CoStoSysConnection conn = obtainOrReserveConnection()) {
+ // We will set the key-retrieval-statement below to cursor mode by
+ // specifying a maximum number of rows to return; for this to work,
+ // auto commit must be turned off.
+ conn.setAutoCommit(false);
+ final Statement stmt = conn.createStatement();
+ // Go to cursor mode by setting a fetch size.
+ stmt.setFetchSize(queryBatchSize);
+ // As we want to query the whole subset/data table, just get a
+ // cursor over all IDs in the set.
+ String sql = "SELECT (" + fieldConfig.getPrimaryKeyString() + ") FROM " + tableName;
+ final ResultSet outerKeyRS = stmt
+ .executeQuery(sql);
+ final DataBaseConnector dbc = this;
+
+ // We need to keep the connection open until the iterator has finished. It will close the connection
+ // when all items have been returned, effectively decreasing the usage level of the CoStoSysConnection.
+ conn.incrementUsageNumber();
+ DBCIterator it = new DBCIterator() {
+
+ private long returnedDocs = 0;
+ private ResultSet keyRS = outerKeyRS;
+ private long limit = limitParam <= 0 ? Long.MAX_VALUE : limitParam;
+ private Iterator xmlIt;
+
+ @Override
+ public boolean hasNext() {
+ if (returnedDocs >= limit)
+ return false;
+
+ try {
+ if (xmlIt == null || !xmlIt.hasNext()) {
+ int currentBatchSize = 0;
+ List ids = new ArrayList();
+ String[] pks = fieldConfig.getPrimaryKey();
+ while (currentBatchSize < queryBatchSize && keyRS.next()) {
+ String[] values = new String[pks.length];
+ for (int i = 0; i < pks.length; i++) {
+ values[i] = (String) keyRS.getObject(i + 1);
+ }
+ ids.add(values);
+ ++currentBatchSize;
+ }
+ if (whereClause != null)
+ xmlIt = new ThreadedColumnsToRetrieveIterator(dbc, conn, ids, dataTable, whereClause, schemaName);
+ else
+ xmlIt = new ThreadedColumnsToRetrieveIterator(dbc, conn, ids, dataTable, schemaName);
+
+ boolean xmlItHasNext = xmlIt.hasNext();
+ if (!xmlItHasNext)
+ close();
+
+ return xmlItHasNext;
+ }
+ } catch (SQLException e) {
+ e.printStackTrace();
+ }
+ return true;
+ }
+
+ @Override
+ public byte[][] next() {
+ if (!hasNext()) {
+ close();
+ return null;
+ }
+ ++returnedDocs;
+ return xmlIt.next();
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void close() {
+ conn.close();
+ }
+
+ };
+
+ return it;
+ } catch (SQLException e) {
+ e.printStackTrace();
+ }
+ return null;
+ }
+
+ /**
+ * Helper method to determine the columns that are returned in case of a joining operation. Returns the number of
+ * returned fields and the according field definitions. If joined
is set to false
, only the
+ * first table and the first schema is taken into account.
+ *
+ * @param joined Whether the data is joined.
+ * @param schemaNames The names of the table schemas of the tables that are read. From the respective table schemas,
+ * the columns that are marked to be retrieved, are extracted.
+ * @return A pair holding the number of retrieved columns and those columns themselves.
+ */
+ public Pair>> getNumColumnsAndFields(boolean joined, String[] schemaNames) {
+ int numColumns = 0;
+ List> fields = new ArrayList<>();
+ if (!joined) {
+ FieldConfig fieldConfig = fieldConfigs.get(schemaNames[0]);
+ numColumns = fieldConfig.getColumnsToRetrieve().length;
+ fields = fieldConfig.getFields();
+ } else {
+ for (int i = 0; i < schemaNames.length; i++) {
+ FieldConfig fieldConfig = fieldConfigs.get(schemaNames[i]);
+ int num = fieldConfig.getColumnsToRetrieve().length;
+ numColumns = numColumns + num;
+ List> fieldsPartly = fieldConfig.getFieldsToRetrieve();
+ fields.addAll(fieldsPartly);
+ }
+ }
+ return new ImmutablePair<>(numColumns, fields);
+ }
+
+ /**
+ * Returns the row count of the requested table.
+ *
+ * @param tableName The table to count the rows of.
+ * @return The table row count.
+ */
+ public long getNumRows(String tableName) {
+
+ try (CoStoSysConnection conn = obtainOrReserveConnection()) {
+ String sql = String.format("SELECT sum(1) as %s FROM %s", Constants.TOTAL, tableName);
+ ResultSet resultSet = conn.createStatement().executeQuery(sql);
+ if (resultSet.next()) {
+ return resultSet.getLong(Constants.TOTAL);
+ }
+ } catch (SQLException e) {
+ LOG.error("Error when trying to determine size of table {}: {}", tableName, e);
+ }
+ return 0;
+ }
+
+ /**
+ * Returns a map with information about how many rows are marked as
+ * is_in_process , is_processed and how many rows there are in
+ * total.
+ * The respective values are stored under with the keys
+ * {@link Constants#IN_PROCESS}, {@link Constants#PROCESSED} and
+ * {@link Constants#TOTAL}.
+ *
+ * @param subsetTableName name of the subset table to gain status information for
+ * @return A SubsetStatus instance containing status information about the
+ * subset table subsetTableName
+ * @throws TableNotFoundException If subsetTableName does not point to a database table.
+ */
+ public SubsetStatus status(String subsetTableName, Set statusElementsToReturn) throws TableNotFoundException {
+ if (!tableExists(subsetTableName))
+ throw new TableNotFoundException("The subset table \"" + subsetTableName + "\" does not exist.");
+
+ SubsetStatus status = new SubsetStatus();
+
+ try (CoStoSysConnection conn = obtainOrReserveConnection()) {
+ StringJoiner joiner = new StringJoiner(",");
+ String sumFmtString = "sum(case when %s=TRUE then 1 end) as %s";
+ if (statusElementsToReturn.contains(StatusElement.HAS_ERRORS))
+ joiner.add(String.format(sumFmtString, Constants.HAS_ERRORS, Constants.HAS_ERRORS));
+ if (statusElementsToReturn.contains(StatusElement.IS_PROCESSED))
+ joiner.add(String.format(sumFmtString, Constants.IS_PROCESSED, Constants.IS_PROCESSED));
+ if (statusElementsToReturn.contains(StatusElement.IN_PROCESS))
+ joiner.add(String.format(sumFmtString, Constants.IN_PROCESS, Constants.IN_PROCESS));
+ if (statusElementsToReturn.contains(StatusElement.TOTAL))
+ joiner.add(String.format("sum(1) as %s", Constants.TOTAL));
+ String sql = String.format(
+ "SELECT " + joiner.toString() + " FROM %s", subsetTableName);
+ Statement stmt = conn.createStatement();
+ {
+ ResultSet res = stmt.executeQuery(sql);
+ if (res.next()) {
+ if (statusElementsToReturn.contains(StatusElement.HAS_ERRORS))
+ status.hasErrors = res.getLong(Constants.HAS_ERRORS);
+ if (statusElementsToReturn.contains(StatusElement.IN_PROCESS))
+ status.inProcess = res.getLong(Constants.IN_PROCESS);
+ if (statusElementsToReturn.contains(StatusElement.IS_PROCESSED))
+ status.isProcessed = res.getLong(Constants.IS_PROCESSED);
+ if (statusElementsToReturn.contains(StatusElement.TOTAL))
+ status.total = res.getLong(Constants.TOTAL);
+ }
+ }
+
+ if (statusElementsToReturn.contains(StatusElement.LAST_COMPONENT)) {
+ SortedMap pipelineStates = new TreeMap<>();
+ status.pipelineStates = pipelineStates;
+ String pipelineStateSql = String.format("SELECT %s,count(%s) from %s group by %s",
+ Constants.LAST_COMPONENT, Constants.LAST_COMPONENT, subsetTableName, Constants.LAST_COMPONENT);
+ ResultSet res = stmt.executeQuery(pipelineStateSql);
+ while (res.next())
+ pipelineStates.put(res.getString(1) != null ? res.getString(1) : "", res.getLong(2));
+ }
+ } catch (SQLException e) {
+ e.printStackTrace();
+ }
+
+ return status;
+ }
+
+ /**
+ * @return - all tables in the active scheme
+ */
+ public List getTables() {
+ ArrayList tables = new ArrayList();
+ try (CoStoSysConnection conn = obtainOrReserveConnection()) {
+ ResultSet res = conn.getMetaData().getTables(null, dbConfig.getActivePGSchema(), null,
+ new String[]{"TABLE"});
+ while (res.next())
+ tables.add(res.getString("TABLE_NAME"));
+ } catch (SQLException e) {
+ e.printStackTrace();
+ }
+ return tables;
+ }
+
+ /**
+ * Query the MetaData for the columns of a table
+ *
+ * @param tableName - the table
+ * @return - List of String containing name and type of each column
+ */
+ public List getTableDefinition(String tableName) {
+ try (CoStoSysConnection conn = obtainOrReserveConnection()) {
+ ArrayList columns = new ArrayList();
+ String schema;
+ if (tableName.contains(".")) {
+ schema = tableName.split("\\.")[0];
+ tableName = tableName.split("\\.")[1];
+ } else
+ schema = dbConfig.getActivePGSchema();
+ try {
+ ResultSet res = conn.getMetaData().getColumns(null, schema, tableName, null);
+ // ERIK 6th of December 2013: Removed the type information because
+ // it lead to false positives: When the
+ // dbcConfiguration specifies an "integer", it actually becomes an
+ // "int4". This could be treated, for the
+ // moment
+ // only the names will be checked.
+ while (res.next())
+ // columns.add(res.getString("COLUMN_NAME") + " " +
+ // res.getString("TYPE_NAME"));
+ columns.add(res.getString("COLUMN_NAME"));
+ } catch (SQLException e) {
+ e.printStackTrace();
+ }
+ return columns;
+ }
+ }
+
+ /**
+ * @return - the active Postgres scheme
+ */
+ public String getScheme() {
+ String scheme = "none";
+ try (CoStoSysConnection conn = obtainOrReserveConnection()){
+ ResultSet res = conn.createStatement().executeQuery("SHOW search_path;");
+ if (res.next())
+ scheme = res.getString(1);
+ } catch (SQLException e) {
+ e.printStackTrace();
+ }
+ return scheme;
+ }
+
+ /*******************************
+ * Classes for query()
+ *******************************************/
+
+ /**
+ * @return the active field configuration
+ */
+ public FieldConfig getFieldConfiguration() {
+ return fieldConfigs.get(activeTableSchema);
+ }
+
+ public void addFieldConfiguration(FieldConfig config) {
+ fieldConfigs.put(config.getName(), config);
+ }
+
+ /**
+ * @param schemaName The name of the schema for which the eventual
+ * FieldConfig
should be returned.
+ * @return The field configuration for schemaName
.
+ */
+ public FieldConfig getFieldConfiguration(String schemaName) {
+ return fieldConfigs.get(schemaName);
+ }
+
+ /**
+ * Checks whether the given table matches the active table schema.
+ *
+ * @param tableName The table to check.
+ * @see #checkTableDefinition(String, String)
+ */
+ public void checkTableDefinition(String tableName) throws TableSchemaMismatchException, TableNotFoundException {
+ checkTableDefinition(tableName, activeTableSchema);
+ }
+
+ /**
+ * Compares the actual table in the database with its definition in the xml
+ * configuration
+ * Note: This method currently does not check other then primary key columns for
+ * tables that reference another table, even if those should actually be data
+ * tables.
+ *
+ * This method makes use of the {@link #obtainOrReserveConnection()} method to obtain a connection in case
+ * the current thread has not already obtained one.
+ *
+ *
+ * @param tableName - table to check
+ */
+ public void checkTableDefinition(String tableName, String schemaName) throws TableSchemaMismatchException, TableNotFoundException {
+ try (CoStoSysConnection connection = obtainOrReserveConnection()) {
+ if (!tableExists(tableName))
+ throw new TableNotFoundException("The table '" + tableName + "' does not exist.");
+ FieldConfig fieldConfig = fieldConfigs.get(schemaName);
+
+ List actualColumns = new ArrayList<>();
+ List definedColumns = new ArrayList<>();
+
+ // Postgres will convert table names to lower case but check for capital
+ // letter names all the same, thus never
+ // finding a match when giving names with capital letters.
+ tableName = tableName.toLowerCase();
+
+ // ERIK 6th of December 2013: Removed the type information because it
+ // lead to false positives: When the
+ // dbcConfiguration specifies an "integer", it actually becomes an
+ // "int4". This could be treated, for the moment
+ // only the names will be checked.
+ String tableType;
+ if (getReferencedTable(tableName) == null) { // dataTable, check all
+ tableType = "data";
+ // columns
+ actualColumns = new ArrayList<>(getTableDefinition(tableName));
+ for (Map m : fieldConfig.getFields())
+ // definedColumns.add(m.get("name") + " " + m.get("type"));
+ definedColumns.add(m.get(JulieXMLConstants.NAME));
+
+ } else { // subset table, check only pk-columns
+ tableType = "subset";
+ for (Map m : fieldConfig.getFields())
+ if (new Boolean(m.get(JulieXMLConstants.PRIMARY_KEY)))
+ // definedColumns.add(m.get("name") + " " + m.get("type"));
+ definedColumns.add(m.get("name"));
+
+ // getting pk-names and types
+ String schema;
+ if (tableName.contains(".")) {
+ schema = tableName.split("\\.")[0];
+ tableName = tableName.split("\\.")[1];
+ } else
+ schema = dbConfig.getActivePGSchema();
+
+ HashSet pkNames = new HashSet();
+
+ Connection conn = connection.getConnection();
+ try {
+ ResultSet res = conn.getMetaData().getImportedKeys("", schema, tableName);
+ while (res.next())
+ pkNames.add(res.getString("FKCOLUMN_NAME"));
+ res = conn.getMetaData().getColumns(null, schema, tableName, null);
+ while (res.next()) {
+ if (pkNames.contains(res.getString("COLUMN_NAME")))
+ // actualColumns.add(res.getString("COLUMN_NAME") + " "
+ // + res.getString("TYPE_NAME"));
+ actualColumns.add(res.getString("COLUMN_NAME"));
+ }
+ } catch (SQLException e) {
+ e.printStackTrace();
+ }
+ }
+ Collections.sort(definedColumns);
+ Collections.sort(actualColumns);
+ if (!definedColumns.equals(actualColumns)) {
+
+ String columnType = tableType.equals("subset") ? "primary key " : "";
+ throw new TableSchemaMismatchException("The existing " + tableType + " table \"" + tableName + "\" has the following " +
+ columnType +
+ "columns: \"" + StringUtils.join(actualColumns, " ") + "\". However, the CoStoSys table " +
+ "schema \"" + schemaName + "\" that is used to operate on that table specifies a different set of " + columnType + "columns:" +
+ StringUtils.join(definedColumns, " ") + ". The active table schema is specified in the CoStoSys XML coniguration file.");
+ }
+ }
+ }
+
+ /**
+ *
+ * Sets the values of is_processed
to TRUE
and of
+ * is_in_process
to FALSE
for a collection of
+ * documents according to the given primary keys.
+ *
+ *
+ * @param subsetTableName name of the subset
+ * @param primaryKeyList the list of primary keys which itself can consist of several
+ * primary key elements
+ */
+ public void setProcessed(String subsetTableName, ArrayList primaryKeyList) {
+
+ FieldConfig fieldConfig = fieldConfigs.get(activeTableSchema);
+
+ String whereArgument = StringUtils.join(fieldConfig.expandPKNames("%s = ?"), " AND ");
+ String update = "UPDATE " + subsetTableName + " SET is_processed = TRUE, is_in_process = FALSE" + " WHERE "
+ + whereArgument;
+
+ try (CoStoSysConnection conn = obtainOrReserveConnection()) {
+ conn.setAutoCommit(false);
+
+ PreparedStatement processed = conn.prepareStatement(update);
+ for (byte[][] primaryKey : primaryKeyList) {
+ for (int i = 0; i < primaryKey.length; i++) {
+ processed.setString(i + 1, new String(primaryKey[i]));
+ }
+ processed.addBatch();
+ }
+ processed.executeBatch();
+ conn.commit();
+
+ } catch (SQLException e) {
+ e.printStackTrace();
+ }
+ }
+
+ /**
+ *
+ * Sets the value of has_errors
to TRUE
and adds a
+ * description in log
for exceptions which occured during the
+ * processing of a collection of documents according to the given primary keys.
+ *
+ *
+ * @param subsetTableName name of the subset
+ * @param primaryKeyList the list of primary keys which itself can consist of several
+ * primary key elements
+ * @param logException matches primary keys of unsuccessfully processed documents and
+ * exceptions that occured during the processing
+ */
+ public void setException(String subsetTableName, ArrayList primaryKeyList,
+ HashMap logException) {
+
+
+ FieldConfig fieldConfig = fieldConfigs.get(activeTableSchema);
+
+ String whereArgument = StringUtils.join(fieldConfig.expandPKNames("%s = ?"), " AND ");
+ String update = "UPDATE " + subsetTableName + " SET has_errors = TRUE, log = ?" + " WHERE " + whereArgument;
+
+ try (CoStoSysConnection conn = obtainOrReserveConnection()) {
+ conn.setAutoCommit(false);
+
+ PreparedStatement processed = conn.prepareStatement(update);
+ for (byte[][] primaryKey : primaryKeyList) {
+ for (int i = 0; i < primaryKey.length; i++) {
+ processed.setString(1, logException.get(primaryKey));
+ processed.setString(i + 2, new String(primaryKey[i]));
+ }
+ processed.addBatch();
+ }
+ processed.executeBatch();
+ conn.commit();
+
+ } catch (SQLException e) {
+ e.printStackTrace();
+ }
+ }
+
+ /**
+ * Returns the indices of the primary keys, beginning with 0.
+ */
+ public List getPrimaryKeyIndices() {
+ FieldConfig fieldConfig = fieldConfigs.get(activeTableSchema);
+ List pkIndices = fieldConfig.getPrimaryKeyFieldNumbers();
+ return pkIndices;
+ }
+
+ public void checkTableSchemaCompatibility(String referenceSchema, String[] schemaNames) throws TableSchemaMismatchException {
+ String[] schemas = new String[schemaNames.length + 1];
+ schemas[0] = referenceSchema;
+ System.arraycopy(schemaNames, 0, schemas, 1, schemaNames.length);
+ checkTableSchemaCompatibility(schemas);
+ }
+
+ public void checkTableSchemaCompatibility(String... schemaNames) throws TableSchemaMismatchException {
+ if (null == schemaNames || schemaNames.length == 0) {
+ LOG.warn("No table schema names were passed - nothing to check.");
+ return;
+ }
+ List referenceKey = null;
+ String referenceSchemaName = null;
+ List notMatchingSchemaNames = new ArrayList<>();
+ for (String schemaName : schemaNames) {
+ FieldConfig fieldConfig = fieldConfigs.get(schemaName);
+ String[] primaryKey = fieldConfig.getPrimaryKey();
+ List asList = Arrays.asList(primaryKey);
+ Collections.sort(asList);
+ if (null == referenceKey) {
+ referenceKey = asList;
+ referenceSchemaName = schemaName;
+ } else {
+ if (!referenceKey.equals(asList))
+ notMatchingSchemaNames.add(schemaName);
+ }
+ }
+ if (!notMatchingSchemaNames.isEmpty())
+ throw new TableSchemaMismatchException(
+ "Found incompatibility of table schema definitions with schemas " + StringUtils.join(schemaNames, ", ") + ": There were at least one table schema pair that is not compatible to each other because their primary keys differ. The table schema \""
+ + referenceSchemaName + "\" has the primary key \"" + fieldConfigs.get(referenceSchemaName).getPrimaryKeyString() + "\" which differs from the table schema(s) \""
+ + StringUtils.join(notMatchingSchemaNames, ", ") + "\".");
+ }
+
+ public String getDbURL() {
+ return dbURL;
+ }
+
+ public void setDbURL(String uri) {
+ dbURL = uri;
+ }
+
+ public void close() {
+ releaseConnections();
+ LOG.debug("Shutting down DataBaseConnector.");
+ if (dataSource instanceof HikariDataSource) {
+ LOG.debug("Checking if the datasource is still in use (perhaps by other threads or other DBC instances)");
+ final int activeConnections = dataSource.getHikariPoolMXBean().getActiveConnections();
+ final int awaitingConnection = dataSource.getHikariPoolMXBean().getThreadsAwaitingConnection();
+ if (activeConnections > 0) {
+ LOG.debug("Data source is still in use ({} connections active), not closing it. Another DBC instance should exist that will attempt closing the data source at a later time point.", activeConnections);
+ } else if (awaitingConnection > 0) {
+ LOG.debug("There are no active connections right now but {} threads await a connection. Letting the data source open. Another DBC instance should close it later.", awaitingConnection);
+ } else {
+ LOG.debug("Data source does not have active connections, closing it.");
+ dataSource.close();
+ }
+ }
+ }
+
+ public boolean isDatabaseReachable() {
+ try (CoStoSysConnection ignored = obtainOrReserveConnection()) {
+ return true;
+ } catch (Exception e) {
+ LOG.warn("Got error when trying to connect to {}: {}", getDbURL(), e.getMessage());
+ }
+ return false;
+ }
+
+ /**
+ * Adds an auto-generated field configuration that exhibits the given primary key and all the fields required to
+ * store complete XMI document data (i.e. not segmented XMI parts but the whole serialized CAS) in a database table.
+ * The field configuration will have the given primary key and an additional field named 'xmi'.
+ * This method is used by the Jena Document Information
+ * System (JeDIS) components jcore-xmi-db-reader and jcore-xmi-db-consumer.
+ *
+ * @param primaryKey The document primary key for which a document CAS XMI table schema should be created.
+ * @param doGzip Whether the XMI data should be gzipped in the table.
+ * @return The created field configuration.
+ */
+ public synchronized FieldConfig addXmiDocumentFieldConfiguration(List> primaryKey, boolean doGzip) {
+ String referenceSchema = doGzip ? "xmi_complete_cas_gzip" : "xmi_complete_cas";
+ return addPKAdaptedFieldConfiguration(primaryKey, referenceSchema, "-complete-cas-xmi-autogenerated");
+ }
+
+ public synchronized FieldConfig addPKAdaptedFieldConfiguration(List> primaryKey, String fieldConfigurationForAdaption, String fieldConfigurationNameSuffix) {
+ List pkNames = primaryKey.stream().map(map -> map.get(JulieXMLConstants.NAME)).collect(Collectors.toList());
+ String fieldConfigName = StringUtils.join(pkNames, "-") + fieldConfigurationNameSuffix;
+ FieldConfig ret;
+ if (!fieldConfigs.containsKey(fieldConfigName)) {
+ List> fields = new ArrayList<>(primaryKey);
+ FieldConfig xmiConfig = fieldConfigs.get(fieldConfigurationForAdaption);
+ HashSet xmiConfigPkIndices = new HashSet<>(xmiConfig.getPrimaryKeyFieldNumbers());
+ // Add those fields to the new configuration that are not the primary key fields
+ IntStream.range(0, xmiConfig.getFields().size()).
+ filter(i -> !xmiConfigPkIndices.contains(i)).
+ mapToObj(i -> xmiConfig.getFields().get(i)).
+ forEach(fields::add);
+ ret = new FieldConfig(fields, "", fieldConfigName);
+ fieldConfigs.put(ret.getName(), ret);
+ } else {
+ ret = fieldConfigs.get(fieldConfigs.get(fieldConfigName));
+ }
+ return ret;
+ }
+
+ /**
+ * Adds an auto-generated field configuration that exhibits the given primary key and all the fields required to
+ * store XMI base document data (i.e. the document text but not its annotations) in a database table. The additional fields are
+ *
+ * xmi
+ * max_xmi_id
+ * sofa_mapping
+ *
+ * and are required for the storage of XMI annotation graph segments stored in other tables. The schema created with
+ * this method is to be used for the base documents that include the document text. To get a schema with a specific
+ * primary that stores annotation data, see {@link #addXmiAnnotationFieldConfiguration(List, boolean)}.
+ * This method is used by the Jena Document Information
+ * System (JeDIS) components jcore-xmi-db-reader and jcore-xmi-db-consumer.
+ *
+ * @param primaryKey The document primary key for which an base document XMI segmentation table schema should be created.
+ * @param doGzip Whether the XMI data should be gzipped in the table.
+ * @return The created field configuration.
+ */
+ public synchronized FieldConfig addXmiTextFieldConfiguration(List> primaryKey, boolean doGzip) {
+ String referenceSchema = doGzip ? "xmi_text_gzip" : "xmi_text";
+ return addPKAdaptedFieldConfiguration(primaryKey, referenceSchema, "-xmi-text-autogenerated");
+ }
+
+ /**
+ * Adds an auto-generated field configuration that exhibits the given primary key and all the fields required to
+ * store XMI annotation data (not base documents) in database tables. The only field besides the primary key is
+ * xmi
and will store the actual XMI annotation data. This table schema
+ * is used for the storage of XMI annotation graph segments. Those segments will then correspond to
+ * UIMA annotation types that are stored in tables of their own. A table schema to store the base document
+ * is created by {@link #addXmiTextFieldConfiguration(List, boolean)}.
+ * This method is used by the Jena Document Information
+ * System (JeDIS) components jcore-xmi-db-reader and jcore-xmi-db-consumer.
+ *
+ * @param primaryKey The document primary key for which an base document XMI segmentation table schema should be created.
+ * @param doGzip Whether the XMI data should be gzipped in the table.
+ * @return The created field configuration.
+ */
+ public synchronized FieldConfig addXmiAnnotationFieldConfiguration(List> primaryKey, boolean doGzip) {
+ List pkNames = primaryKey.stream().map(map -> map.get(JulieXMLConstants.NAME)).collect(Collectors.toList());
+ String fieldConfigName = StringUtils.join(pkNames, "-") + "-xmi-annotations-autogenerated";
+ FieldConfig ret;
+ if (!fieldConfigs.containsKey(fieldConfigName)) {
+ List> fields = new ArrayList<>();
+ // Important: For the annotation tables we don't want to return their primary key. They are used
+ // as AdditionalTable parameter to the XmiDBReader and the primary key is already returned from the
+ // data table schema.
+ // We make a copy of the primary key fields so we can change them without manipulating the given key.
+ primaryKey.stream().map(HashMap::new).forEach(fields::add);
+ fields.forEach(pkField -> pkField.put(JulieXMLConstants.RETRIEVE, "false"));
+ FieldConfig xmiConfig = fieldConfigs.get(doGzip ? "xmi_annotation_gzip" : "xmi_annotation");
+ HashSet xmiConfigPkIndices = new HashSet<>(xmiConfig.getPrimaryKeyFieldNumbers());
+ // Add those fields to the new configuration that are not the primary key fields
+ IntStream.range(0, xmiConfig.getFields().size()).
+ filter(i -> !xmiConfigPkIndices.contains(i)).
+ mapToObj(i -> xmiConfig.getFields().get(i)).
+ forEach(fields::add);
+ ret = new FieldConfig(fields, "", fieldConfigName);
+ fieldConfigs.put(ret.getName(), ret);
+ } else {
+ ret = fieldConfigs.get(fieldConfigs.get(fieldConfigName));
+ }
+ return ret;
+ }
+
+ public void resetSubset(String subsetTableName, List pkValues) {
+ try (CoStoSysConnection conn = obtainOrReserveConnection()) {
+ resetSubset(conn, subsetTableName, pkValues);
+ }
+
+ }
+
+ /**
+ * Returns the connection associated with the current thread object. To release used connections back to the connection pool, call {@link #releaseConnections()}.
+ *
+ * @return A connection associated with the current thread.
+ * @throws IllegalStateException If there are no reserved connections for the current thread.
+ * @see #obtainOrReserveConnection()
+ * @see #releaseConnections()
+ * @see #reserveConnection()
+ */
+ public CoStoSysConnection obtainConnection() {
+ Thread currentThread = Thread.currentThread();
+ LOG.trace("Trying to obtain previously reserved connection for thread {}", currentThread.getName());
+ List list;
+ try {
+ list = connectionCache.get(currentThread);
+ } catch (ExecutionException e) {
+ throw new RuntimeException(e);
+ }
+ cleanClosedReservedConnections(list, currentThread);
+ if (list.isEmpty())
+ throw new NoReservedConnectionException("There are no reserved connections for the current thread with name \"" + currentThread.getName() + "\". You need to call reserveConnection() before obtaining one.");
+ // Return the newest connection. The idea is to stick "closer" to the time the connection was reserved so that
+ // a method can be sure that it reserves a connection for its subcalls.
+ final CoStoSysConnection conn = list.get(list.size() - 1);
+ LOG.trace("Obtaining already reserved connection {} for thread {}", conn.getConnection(), currentThread.getName());
+ conn.incrementUsageNumber();
+ return conn;
+ }
+
+ /**
+ *
+ * This is the preferred way to obtain a database connection. It will reuse an existing connection or get a new one if required.
+ *
+ * A reserved connection is required by many internal methods that need a database
+ * connection. They will aquire it by calling {@link #obtainConnection()}. This helps in reusing the same connection
+ * for multiple tasks within a single thread. This also helps to avoid deadlocks where a single thread requests
+ * multiple connections from the connection pool in method subcalls, blocking itself.
+ *
+ * Guaranteed to return either an already reserved connection or a newly reserved one. The newlyReserved property of the returned
+ * object indicates whether the returned connection was newly reserved or not (true
/
+ * false
, respectively). To comfortably release the connection only when it was newly reserved, use
+ * {@link #releaseConnection(CoStoSysConnection)} or simply {@link CoStoSysConnection#release()}.
+ *
+ *
+ * @return A pair consisting of connection and the information if it was newly reserved or not.
+ * @see #releaseConnection(CoStoSysConnection)
+ */
+ public CoStoSysConnection obtainOrReserveConnection() {
+ LOG.trace("Connection requested, obtained or newly reserved");
+ CoStoSysConnection connection;
+ int reservedConnections = getNumReservedConnections();
+ if (reservedConnections == 0) {
+ connection = reserveConnection();
+ } else {
+ connection = obtainConnection();
+ if (LOG.isTraceEnabled())
+ LOG.trace("There are connections available, obtained {}", connection.getConnection());
+ }
+ return connection;
+ }
+
+ public int getNumReservedConnections() {
+ Thread currentThread = Thread.currentThread();
+ List list;
+ try {
+ list = connectionCache.get(currentThread);
+ } catch (ExecutionException e) {
+ throw new RuntimeException(e);
+ }
+ LOG.trace("Currently, there are {} connections reserved for thread {}", list.size(), Thread.currentThread().getName());
+ if (!list.isEmpty()) {
+ cleanClosedReservedConnections(list, currentThread);
+ LOG.trace("After cleaning, {} connections remain for thread {}", list.size(), Thread.currentThread().getName());
+ }
+ return list.size();
+ }
+
+ /**
+ * Checks the reserved connections whether they have already been closed and removes those from the passed list.
+ *
+ * @param list The list of reserved connections of a thread.
+ */
+ private void cleanClosedReservedConnections(List list, Thread thread) {
+ LOG.trace("Cleaning already closed connections from the list of reserved connections for thread {}", thread.getName());
+ Iterator it = list.iterator();
+ while (it.hasNext()) {
+ CoStoSysConnection conn = it.next();
+ try {
+ if (conn.getConnection().isClosed()) {
+ LOG.trace("Removing connection {} from the list for thread \"{}\" because it is closed.", conn.getConnection(), thread.getName());
+ it.remove();
+ }
+ } catch (SQLException e) {
+ LOG.error("Exception occurred when checking if a connection is closed", e);
+ }
+ }
+ }
+
+ /**
+ * Only use when you are sure you need this method. Otherwise, use {@link #obtainOrReserveConnection()}
+ *
+ * Reserves a connection for the current thread. A reserved connection is required by many internal methods that need a database
+ * connection. They will aquire it by calling {@link #obtainConnection()}. This helps in reusing the same connection
+ * for multiple tasks within a single thread. This also helps to avoid deadlocks where a single thread requests
+ * multiple connections from the connection pool in method subcalls, blocking itself.
+ *
+ *
+ * Note that is possible to reserve multiple connections but that this does not have any positive effect as of now.
+ * You should always only reserve one connection per thread. After the connection is not required any more, call
+ * {@link #releaseConnections()} to free the connection.
+ *
+ *
+ * @return The newly reserved connection.
+ * @see #obtainConnection()
+ * @see #releaseConnections()
+ */
+ public CoStoSysConnection reserveConnection() {
+ if (LOG.isTraceEnabled()) {
+ final ConcurrentMap> map = connectionCache.asMap();
+ StringBuilder sb = new StringBuilder("Current connection allocation:").append("\n");
+ for (Thread t : map.keySet()) {
+ sb.append("Thread '").append(t.getName()).append("':\t\t").append(map.get(t).size()).append("\n");
+ }
+ LOG.trace(sb.toString());
+ }
+
+ Thread currentThread = Thread.currentThread();
+ LOG.trace("Trying to reserve a connection for thread \"{}\"", currentThread.getName());
+ List list;
+ try {
+ list = connectionCache.get(currentThread);
+ } catch (ExecutionException e) {
+ throw new RuntimeException(e);
+ }
+ int listSize = list.size();
+ cleanClosedReservedConnections(list, currentThread);
+ if (LOG.isTraceEnabled() && list.size() < listSize) {
+ LOG.trace("The list of connections for thread \"{}\" was shortened from {} to {} due to connections closed in the meantime.", currentThread.getName(), listSize, list.size());
+ }
+ if (list.size() == dbConfig.getMaxConnections())
+ LOG.warn("The current thread \"" + currentThread.getName() + "\" has already reserved " + list.size() + " connections. The connection pool is of size " + dbConfig.getMaxConnections() + ". Cannot reserve another connection. Call releaseConnections() to free reserved connections back to the pool. It will be tried to obtain a connection by waiting for one to get free. This might end in a timeout error.");
+ Connection conn = getConn();
+ CoStoSysConnection costoConn = new CoStoSysConnection(this, conn, true);
+ list.add(costoConn);
+ LOG.trace("Reserving connection {} for thread \"{}\". This thread has now {} connections reserved.", conn, currentThread.getName(), list.size());
+ return costoConn;
+ }
+
+ /**
+ * Releases all connections associated with the current thread back to the connection pool. After this call,
+ * the current thread will not have any reserved connections left.
+ *
+ * @see #obtainOrReserveConnection()
+ * @see #reserveConnection()
+ * @see #obtainConnection()
+ */
+ public void releaseConnections() {
+ Thread currentThread = Thread.currentThread();
+ LOG.trace("Releasing all connections held for Thread \"{}\"", currentThread.getName());
+ List connectionList;
+ try {
+ connectionList = connectionCache.get(currentThread);
+ } catch (ExecutionException e) {
+ throw new RuntimeException(e);
+ }
+ for (CoStoSysConnection conn : connectionList) {
+ try {
+ if (!conn.getConnection().isClosed()) {
+ LOG.trace("Closing connection {}", conn);
+ conn.getConnection().close();
+ }
+ } catch (SQLException e) {
+ LOG.error("Could not release connection back to the pool", e);
+ }
+ }
+ connectionList.clear();
+ }
+
+ /**
+ * Removes the given connection from the list of reserved connection of the calling thread. If the connection
+ * was not reserved by the calling thread, an IllegalArgumentException will be raised. However, it is
+ * also possible to release connections received from another thread by just closing them via {@link Connection#close()}.
+ * This should only be used intentionally, however, to avoid confusion.
+ *
+ * @param conn
+ * @throws IllegalArgumentException If the given connection is not associated with the current thread.
+ */
+ public void releaseConnection(CoStoSysConnection conn) throws SQLException {
+ Thread currentThread = Thread.currentThread();
+ LOG.trace("Releasing connection {} for thread \"{}\"", conn.getConnection(), currentThread.getName());
+ List connectionList;
+ try {
+ connectionList = connectionCache.get(currentThread);
+ } catch (ExecutionException e) {
+ throw new RuntimeException(e);
+ }
+ // Note that this will not remove anything if the connection is closed by a different thread than the originally reserving one.
+ // This shouldn't be an issue, however, since we clean up closed connections regularly.
+ connectionList.remove(conn);
+ conn.getConnection().close();
+ }
+
+
+ public Object withConnectionQuery(DbcQuery> command) {
+ Object ret = null;
+ try (CoStoSysConnection ignored = obtainOrReserveConnection()) {
+ try {
+ ret = command.query(this);
+ } catch (Throwable throwable) {
+ LOG.error("Could not execute query", throwable);
+ }
+ }
+ return ret;
+ }
+
+ public boolean withConnectionQueryBoolean(DbcQuery command) {
+ return (boolean) withConnectionQuery(command);
+ }
+
+ public int withConnectionQueryInteger(DbcQuery command) {
+ return (int) withConnectionQuery(command);
+ }
+
+ public double withConnectionQueryDouble(DbcQuery command) {
+ return (double) withConnectionQuery(command);
+ }
+
+ public String withConnectionQueryString(DbcQuery query) {
+ return (String) withConnectionQuery(query);
+ }
+
+ public void withConnectionExecute(DbcExecution command) {
+ boolean close = false;
+ try (CoStoSysConnection ignored = obtainOrReserveConnection()) {
+ try {
+ command.execute(this);
+ } catch (Throwable throwable) {
+ LOG.error("Could not execute SQL", throwable);
+ }
+ }
+ }
+
+ public enum StatusElement {HAS_ERRORS, IS_PROCESSED, IN_PROCESS, TOTAL, LAST_COMPONENT}
+
+ /**
+ * A class to parse xml files and make them accessible with an iterator
+ *
+ * @author hellrich
+ */
+ private class XMLPreparer {
+ private final FieldConfig fieldConfig;
+ private File fileOrDir;
+
+ protected XMLPreparer(File fileOrDir, FieldConfig fieldConfig) {
+ this.fileOrDir = fileOrDir;
+ this.fieldConfig = fieldConfig;
+ }
+
+ /**
+ * Parses a xml file according to the FieldConfig for this DatabaseConnector
+ *
+ * @param fileName - file to parse
+ * @return - an iterator, yielding rows for a database
+ */
+ protected Iterator> prepare(String fileName) {
+
+ String xmlFilePath = fileOrDir.getAbsolutePath();
+ if (fileOrDir.isDirectory()) {
+ xmlFilePath = xmlFilePath + "/" + fileName;
+ }
+ File xmlFile = new File(xmlFilePath);
+ boolean hugeFile = false;
+ if (!fileName.endsWith(".zip") && xmlFile.length() >= 1024 * 1024 * 1024) {
+ LOG.info("File is larger than 1GB. Trying VTD huge.");
+ hugeFile = true;
+ }
+ return JulieXMLTools.constructRowIterator(xmlFilePath, BUFFER_SIZE, fieldConfig.getForEachXPath(),
+ fieldConfig.getFields(), hugeFile);
+
+ }
+
+ }
+
+}
diff --git a/src/main/java/de/julielab/xmlData/dataBase/DbcExecution.java b/src/main/java/de/julielab/costosys/dbconnection/DbcExecution.java
similarity index 68%
rename from src/main/java/de/julielab/xmlData/dataBase/DbcExecution.java
rename to src/main/java/de/julielab/costosys/dbconnection/DbcExecution.java
index bff0806..1af7eec 100644
--- a/src/main/java/de/julielab/xmlData/dataBase/DbcExecution.java
+++ b/src/main/java/de/julielab/costosys/dbconnection/DbcExecution.java
@@ -1,4 +1,4 @@
-package de.julielab.xmlData.dataBase;
+package de.julielab.costosys.dbconnection;
public interface DbcExecution {
void execute(DataBaseConnector dbc) throws Throwable;
diff --git a/src/main/java/de/julielab/xmlData/dataBase/DbcQuery.java b/src/main/java/de/julielab/costosys/dbconnection/DbcQuery.java
similarity index 66%
rename from src/main/java/de/julielab/xmlData/dataBase/DbcQuery.java
rename to src/main/java/de/julielab/costosys/dbconnection/DbcQuery.java
index 5a91ddd..e2c1b88 100644
--- a/src/main/java/de/julielab/xmlData/dataBase/DbcQuery.java
+++ b/src/main/java/de/julielab/costosys/dbconnection/DbcQuery.java
@@ -1,4 +1,4 @@
-package de.julielab.xmlData.dataBase;
+package de.julielab.costosys.dbconnection;
public interface DbcQuery {
T query(DataBaseConnector dbc) throws Throwable;
diff --git a/src/main/java/de/julielab/xmlData/dataBase/QueryHelper.java b/src/main/java/de/julielab/costosys/dbconnection/QueryHelper.java
similarity index 92%
rename from src/main/java/de/julielab/xmlData/dataBase/QueryHelper.java
rename to src/main/java/de/julielab/costosys/dbconnection/QueryHelper.java
index 64bd10c..afc50ee 100644
--- a/src/main/java/de/julielab/xmlData/dataBase/QueryHelper.java
+++ b/src/main/java/de/julielab/costosys/dbconnection/QueryHelper.java
@@ -1,4 +1,4 @@
-package de.julielab.xmlData.dataBase;
+package de.julielab.costosys.dbconnection;
import java.sql.Connection;
import java.sql.ResultSet;
diff --git a/src/main/java/de/julielab/xmlData/dataBase/SubsetStatus.java b/src/main/java/de/julielab/costosys/dbconnection/SubsetStatus.java
similarity index 97%
rename from src/main/java/de/julielab/xmlData/dataBase/SubsetStatus.java
rename to src/main/java/de/julielab/costosys/dbconnection/SubsetStatus.java
index 521eba4..f683474 100644
--- a/src/main/java/de/julielab/xmlData/dataBase/SubsetStatus.java
+++ b/src/main/java/de/julielab/costosys/dbconnection/SubsetStatus.java
@@ -1,4 +1,4 @@
-package de.julielab.xmlData.dataBase;
+package de.julielab.costosys.dbconnection;
import java.text.DecimalFormat;
import java.util.Map.Entry;
diff --git a/src/main/java/de/julielab/xmlData/dataBase/ThreadedColumnsIterator.java b/src/main/java/de/julielab/costosys/dbconnection/ThreadedColumnsIterator.java
similarity index 98%
rename from src/main/java/de/julielab/xmlData/dataBase/ThreadedColumnsIterator.java
rename to src/main/java/de/julielab/costosys/dbconnection/ThreadedColumnsIterator.java
index b7fd3c4..2b2cc22 100644
--- a/src/main/java/de/julielab/xmlData/dataBase/ThreadedColumnsIterator.java
+++ b/src/main/java/de/julielab/costosys/dbconnection/ThreadedColumnsIterator.java
@@ -1,15 +1,13 @@
-package de.julielab.xmlData.dataBase;
+package de.julielab.costosys.dbconnection;
+import de.julielab.costosys.configuration.FieldConfig;
import de.julielab.xml.JulieXMLConstants;
import de.julielab.xml.JulieXMLTools;
-import de.julielab.xmlData.config.FieldConfig;
import org.apache.commons.lang3.StringUtils;
-import org.apache.commons.lang3.tuple.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
-import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
diff --git a/src/main/java/de/julielab/xmlData/dataBase/ThreadedColumnsToRetrieveIterator.java b/src/main/java/de/julielab/costosys/dbconnection/ThreadedColumnsToRetrieveIterator.java
similarity index 99%
rename from src/main/java/de/julielab/xmlData/dataBase/ThreadedColumnsToRetrieveIterator.java
rename to src/main/java/de/julielab/costosys/dbconnection/ThreadedColumnsToRetrieveIterator.java
index 7f2df26..6bf8646 100644
--- a/src/main/java/de/julielab/xmlData/dataBase/ThreadedColumnsToRetrieveIterator.java
+++ b/src/main/java/de/julielab/costosys/dbconnection/ThreadedColumnsToRetrieveIterator.java
@@ -1,8 +1,8 @@
-package de.julielab.xmlData.dataBase;
+package de.julielab.costosys.dbconnection;
+import de.julielab.costosys.configuration.FieldConfig;
import de.julielab.xml.JulieXMLConstants;
import de.julielab.xml.JulieXMLTools;
-import de.julielab.xmlData.config.FieldConfig;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.slf4j.Logger;
@@ -90,7 +90,7 @@ public ThreadedColumnsToRetrieveIterator(DataBaseConnector dbc, List i
/*
* (non-Javadoc)
*
- * @see de.julielab.xmlData.dataBase.DBCThreadedIterator#destroy()
+ * @see DBCThreadedIterator#destroy()
*/
@Override
public void close() {
diff --git a/src/main/java/de/julielab/xmlData/dataBase/util/CoStoSysException.java b/src/main/java/de/julielab/costosys/dbconnection/util/CoStoSysException.java
similarity index 91%
rename from src/main/java/de/julielab/xmlData/dataBase/util/CoStoSysException.java
rename to src/main/java/de/julielab/costosys/dbconnection/util/CoStoSysException.java
index a5cf3ba..0112e9b 100644
--- a/src/main/java/de/julielab/xmlData/dataBase/util/CoStoSysException.java
+++ b/src/main/java/de/julielab/costosys/dbconnection/util/CoStoSysException.java
@@ -1,4 +1,4 @@
-package de.julielab.xmlData.dataBase.util;
+package de.julielab.costosys.dbconnection.util;
public class CoStoSysException extends Exception {
public CoStoSysException() {
diff --git a/src/main/java/de/julielab/xmlData/dataBase/util/CoStoSysRuntimeException.java b/src/main/java/de/julielab/costosys/dbconnection/util/CoStoSysRuntimeException.java
similarity index 92%
rename from src/main/java/de/julielab/xmlData/dataBase/util/CoStoSysRuntimeException.java
rename to src/main/java/de/julielab/costosys/dbconnection/util/CoStoSysRuntimeException.java
index 1135d3a..1cc102f 100644
--- a/src/main/java/de/julielab/xmlData/dataBase/util/CoStoSysRuntimeException.java
+++ b/src/main/java/de/julielab/costosys/dbconnection/util/CoStoSysRuntimeException.java
@@ -1,4 +1,4 @@
-package de.julielab.xmlData.dataBase.util;
+package de.julielab.costosys.dbconnection.util;
public class CoStoSysRuntimeException extends RuntimeException {
public CoStoSysRuntimeException() {
diff --git a/src/main/java/de/julielab/xmlData/dataBase/util/CoStoSysSQLRuntimeException.java b/src/main/java/de/julielab/costosys/dbconnection/util/CoStoSysSQLRuntimeException.java
similarity index 92%
rename from src/main/java/de/julielab/xmlData/dataBase/util/CoStoSysSQLRuntimeException.java
rename to src/main/java/de/julielab/costosys/dbconnection/util/CoStoSysSQLRuntimeException.java
index 8dcf561..a9c05c0 100644
--- a/src/main/java/de/julielab/xmlData/dataBase/util/CoStoSysSQLRuntimeException.java
+++ b/src/main/java/de/julielab/costosys/dbconnection/util/CoStoSysSQLRuntimeException.java
@@ -1,4 +1,4 @@
-package de.julielab.xmlData.dataBase.util;
+package de.julielab.costosys.dbconnection.util;
public class CoStoSysSQLRuntimeException extends CoStoSysRuntimeException {
public CoStoSysSQLRuntimeException() {
diff --git a/src/main/java/de/julielab/xmlData/dataBase/util/NoReservedConnectionException.java b/src/main/java/de/julielab/costosys/dbconnection/util/NoReservedConnectionException.java
similarity index 92%
rename from src/main/java/de/julielab/xmlData/dataBase/util/NoReservedConnectionException.java
rename to src/main/java/de/julielab/costosys/dbconnection/util/NoReservedConnectionException.java
index 5349150..f4e3dce 100644
--- a/src/main/java/de/julielab/xmlData/dataBase/util/NoReservedConnectionException.java
+++ b/src/main/java/de/julielab/costosys/dbconnection/util/NoReservedConnectionException.java
@@ -1,4 +1,4 @@
-package de.julielab.xmlData.dataBase.util;
+package de.julielab.costosys.dbconnection.util;
public class NoReservedConnectionException extends CoStoSysRuntimeException {
public NoReservedConnectionException() {
diff --git a/src/main/java/de/julielab/xmlData/dataBase/util/TableSchemaMismatchException.java b/src/main/java/de/julielab/costosys/dbconnection/util/TableSchemaMismatchException.java
similarity index 92%
rename from src/main/java/de/julielab/xmlData/dataBase/util/TableSchemaMismatchException.java
rename to src/main/java/de/julielab/costosys/dbconnection/util/TableSchemaMismatchException.java
index d83cb7b..4e45d5f 100644
--- a/src/main/java/de/julielab/xmlData/dataBase/util/TableSchemaMismatchException.java
+++ b/src/main/java/de/julielab/costosys/dbconnection/util/TableSchemaMismatchException.java
@@ -1,4 +1,4 @@
-package de.julielab.xmlData.dataBase.util;
+package de.julielab.costosys.dbconnection.util;
public class TableSchemaMismatchException extends CoStoSysException {
public TableSchemaMismatchException() {
diff --git a/src/main/java/de/julielab/xmlData/dataBase/util/UnobtainableConnectionException.java b/src/main/java/de/julielab/costosys/dbconnection/util/UnobtainableConnectionException.java
similarity index 93%
rename from src/main/java/de/julielab/xmlData/dataBase/util/UnobtainableConnectionException.java
rename to src/main/java/de/julielab/costosys/dbconnection/util/UnobtainableConnectionException.java
index e382758..1f16089 100644
--- a/src/main/java/de/julielab/xmlData/dataBase/util/UnobtainableConnectionException.java
+++ b/src/main/java/de/julielab/costosys/dbconnection/util/UnobtainableConnectionException.java
@@ -1,4 +1,4 @@
-package de.julielab.xmlData.dataBase.util;
+package de.julielab.costosys.dbconnection.util;
public class UnobtainableConnectionException extends CoStoSysRuntimeException {
public UnobtainableConnectionException() {
diff --git a/src/main/java/de/julielab/medline/ConfigurationConstants.java b/src/main/java/de/julielab/costosys/medline/ConfigurationConstants.java
similarity index 90%
rename from src/main/java/de/julielab/medline/ConfigurationConstants.java
rename to src/main/java/de/julielab/costosys/medline/ConfigurationConstants.java
index af4cd8c..9af37ff 100644
--- a/src/main/java/de/julielab/medline/ConfigurationConstants.java
+++ b/src/main/java/de/julielab/costosys/medline/ConfigurationConstants.java
@@ -1,4 +1,4 @@
-package de.julielab.medline;
+package de.julielab.costosys.medline;
public class ConfigurationConstants {
public static final String INSERTION_INPUT = "insertion.directory";
diff --git a/src/main/java/de/julielab/medline/DBCMedlineUtilities.java b/src/main/java/de/julielab/costosys/medline/DBCMedlineUtilities.java
similarity index 94%
rename from src/main/java/de/julielab/medline/DBCMedlineUtilities.java
rename to src/main/java/de/julielab/costosys/medline/DBCMedlineUtilities.java
index e8e2754..a3af351 100644
--- a/src/main/java/de/julielab/medline/DBCMedlineUtilities.java
+++ b/src/main/java/de/julielab/costosys/medline/DBCMedlineUtilities.java
@@ -1,4 +1,4 @@
-package de.julielab.medline;
+package de.julielab.costosys.medline;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
diff --git a/src/main/java/de/julielab/medline/ElasticSearchDocumentDeleter.java b/src/main/java/de/julielab/costosys/medline/ElasticSearchDocumentDeleter.java
similarity index 99%
rename from src/main/java/de/julielab/medline/ElasticSearchDocumentDeleter.java
rename to src/main/java/de/julielab/costosys/medline/ElasticSearchDocumentDeleter.java
index 2792005..4ee685f 100644
--- a/src/main/java/de/julielab/medline/ElasticSearchDocumentDeleter.java
+++ b/src/main/java/de/julielab/costosys/medline/ElasticSearchDocumentDeleter.java
@@ -1,4 +1,4 @@
-package de.julielab.medline;
+package de.julielab.costosys.medline;
import org.apache.commons.configuration2.HierarchicalConfiguration;
import org.apache.commons.configuration2.tree.ImmutableNode;
diff --git a/src/main/java/de/julielab/medline/IDocumentDeleter.java b/src/main/java/de/julielab/costosys/medline/IDocumentDeleter.java
similarity index 94%
rename from src/main/java/de/julielab/medline/IDocumentDeleter.java
rename to src/main/java/de/julielab/costosys/medline/IDocumentDeleter.java
index 52048d4..3f3d31e 100644
--- a/src/main/java/de/julielab/medline/IDocumentDeleter.java
+++ b/src/main/java/de/julielab/costosys/medline/IDocumentDeleter.java
@@ -1,4 +1,4 @@
-package de.julielab.medline;
+package de.julielab.costosys.medline;
import org.apache.commons.configuration2.HierarchicalConfiguration;
import org.apache.commons.configuration2.tree.ImmutableNode;
diff --git a/src/main/java/de/julielab/medline/MedlineDataTableDocumentDeleter.java b/src/main/java/de/julielab/costosys/medline/MedlineDataTableDocumentDeleter.java
similarity index 89%
rename from src/main/java/de/julielab/medline/MedlineDataTableDocumentDeleter.java
rename to src/main/java/de/julielab/costosys/medline/MedlineDataTableDocumentDeleter.java
index 064aba7..1e3e071 100644
--- a/src/main/java/de/julielab/medline/MedlineDataTableDocumentDeleter.java
+++ b/src/main/java/de/julielab/costosys/medline/MedlineDataTableDocumentDeleter.java
@@ -1,7 +1,7 @@
-package de.julielab.medline;
+package de.julielab.costosys.medline;
-import de.julielab.xmlData.Constants;
-import de.julielab.xmlData.dataBase.DataBaseConnector;
+import de.julielab.costosys.Constants;
+import de.julielab.costosys.dbconnection.DataBaseConnector;
import org.apache.commons.configuration2.HierarchicalConfiguration;
import org.apache.commons.configuration2.tree.ImmutableNode;
import org.slf4j.Logger;
diff --git a/src/main/java/de/julielab/medline/MedlineDocumentDeletionException.java b/src/main/java/de/julielab/costosys/medline/MedlineDocumentDeletionException.java
similarity index 94%
rename from src/main/java/de/julielab/medline/MedlineDocumentDeletionException.java
rename to src/main/java/de/julielab/costosys/medline/MedlineDocumentDeletionException.java
index a3d1504..6f1c90d 100644
--- a/src/main/java/de/julielab/medline/MedlineDocumentDeletionException.java
+++ b/src/main/java/de/julielab/costosys/medline/MedlineDocumentDeletionException.java
@@ -1,4 +1,4 @@
-package de.julielab.medline;
+package de.julielab.costosys.medline;
public class MedlineDocumentDeletionException extends MedlineUpdateException {
public MedlineDocumentDeletionException() {
diff --git a/src/main/java/de/julielab/medline/MedlineUpdateException.java b/src/main/java/de/julielab/costosys/medline/MedlineUpdateException.java
similarity index 93%
rename from src/main/java/de/julielab/medline/MedlineUpdateException.java
rename to src/main/java/de/julielab/costosys/medline/MedlineUpdateException.java
index c08f255..8026d5d 100644
--- a/src/main/java/de/julielab/medline/MedlineUpdateException.java
+++ b/src/main/java/de/julielab/costosys/medline/MedlineUpdateException.java
@@ -1,4 +1,4 @@
-package de.julielab.medline;
+package de.julielab.costosys.medline;
public class MedlineUpdateException extends Exception {
public MedlineUpdateException() {
diff --git a/src/main/java/de/julielab/medline/Updater.java b/src/main/java/de/julielab/costosys/medline/Updater.java
similarity index 98%
rename from src/main/java/de/julielab/medline/Updater.java
rename to src/main/java/de/julielab/costosys/medline/Updater.java
index 80af840..1df31cf 100644
--- a/src/main/java/de/julielab/medline/Updater.java
+++ b/src/main/java/de/julielab/costosys/medline/Updater.java
@@ -1,10 +1,10 @@
-package de.julielab.medline;
+package de.julielab.costosys.medline;
import de.julielab.xml.JulieXMLConstants;
import de.julielab.xml.JulieXMLTools;
-import de.julielab.xmlData.Constants;
-import de.julielab.xmlData.dataBase.CoStoSysConnection;
-import de.julielab.xmlData.dataBase.DataBaseConnector;
+import de.julielab.costosys.Constants;
+import de.julielab.costosys.dbconnection.CoStoSysConnection;
+import de.julielab.costosys.dbconnection.DataBaseConnector;
import org.apache.commons.configuration2.HierarchicalConfiguration;
import org.apache.commons.configuration2.tree.ImmutableNode;
import org.slf4j.Logger;
diff --git a/src/main/resources/defaultConfiguration.xml b/src/main/resources/defaultConfiguration.xml
index f168709..10e0941 100644
--- a/src/main/resources/defaultConfiguration.xml
+++ b/src/main/resources/defaultConfiguration.xml
@@ -38,22 +38,6 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
@@ -61,16 +45,6 @@
-
-
-
-
-
-
-
-
diff --git a/src/test/java/de/julielab/xmlData/cli/CLITest.java b/src/test/java/de/julielab/costosys/cli/CLITest.java
similarity index 62%
rename from src/test/java/de/julielab/xmlData/cli/CLITest.java
rename to src/test/java/de/julielab/costosys/cli/CLITest.java
index 171f877..ab01a3e 100644
--- a/src/test/java/de/julielab/xmlData/cli/CLITest.java
+++ b/src/test/java/de/julielab/costosys/cli/CLITest.java
@@ -1,17 +1,16 @@
-package de.julielab.xmlData.cli;
+package de.julielab.costosys.cli;
+import de.julielab.costosys.cli.CLI;
import de.julielab.jcore.db.test.DBTestUtils;
-import de.julielab.xmlData.Constants;
-import de.julielab.xmlData.dataBase.DataBaseConnector;
-import de.julielab.xmlData.dataBase.util.TableSchemaMismatchException;
+import de.julielab.costosys.Constants;
+import de.julielab.costosys.dbconnection.DataBaseConnector;
import org.apache.commons.configuration2.ex.ConfigurationException;
import org.testcontainers.containers.PostgreSQLContainer;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
-import java.sql.SQLException;
import static org.assertj.core.api.Assertions.*;
public class CLITest {
@@ -38,7 +37,7 @@ public static void shutdown(){
@Test
public void testImport() throws Exception {
- assertThatCode(() -> CLI.main(new String[]{"-i", "src/test/resources/pubmedsample18n0001.xml.gz"})).doesNotThrowAnyException();
+ assertThatCode(() -> de.julielab.costosys.cli.CLI.main(new String[]{"-i", "src/test/resources/pubmedsample18n0001.xml.gz"})).doesNotThrowAnyException();
dbc.reserveConnection();
assertThat(dbc.tableExists(Constants.DEFAULT_DATA_TABLE_NAME));
assertThat(dbc.getNumRows(Constants.DEFAULT_DATA_TABLE_NAME)).isEqualTo(177);
@@ -46,15 +45,15 @@ public void testImport() throws Exception {
@Test(dependsOnMethods = "testImport")
public void testCreateSubset() {
- assertThatCode(() -> CLI.main(new String[]{"-s", "all_subset", "-a"})).doesNotThrowAnyException();
- assertThatCode(() -> CLI.main(new String[]{"-s", "random_subset", "-r", "10"})).doesNotThrowAnyException();
- assertThatCode(() -> CLI.main(new String[]{"-s", "mirror_subset", "-m"})).doesNotThrowAnyException();
+ assertThatCode(() -> de.julielab.costosys.cli.CLI.main(new String[]{"-s", "all_subset", "-a"})).doesNotThrowAnyException();
+ assertThatCode(() -> de.julielab.costosys.cli.CLI.main(new String[]{"-s", "random_subset", "-r", "10"})).doesNotThrowAnyException();
+ assertThatCode(() -> de.julielab.costosys.cli.CLI.main(new String[]{"-s", "mirror_subset", "-m"})).doesNotThrowAnyException();
}
@Test(dependsOnMethods = "testCreateSubset")
public void testStatus() {
- assertThatCode(() -> CLI.main(new String[]{"-st", "all_subset"})).doesNotThrowAnyException();
- assertThatCode(() -> CLI.main(new String[]{"-st", "random_subset"})).doesNotThrowAnyException();
+ assertThatCode(() -> de.julielab.costosys.cli.CLI.main(new String[]{"-st", "all_subset"})).doesNotThrowAnyException();
+ assertThatCode(() -> de.julielab.costosys.cli.CLI.main(new String[]{"-st", "random_subset"})).doesNotThrowAnyException();
assertThatCode(() -> CLI.main(new String[]{"-st", "mirror_subset"})).doesNotThrowAnyException();
}
}
diff --git a/src/test/java/de/julielab/xmlData/config/ConfigReaderTest.java b/src/test/java/de/julielab/costosys/configuration/ConfigReaderTest.java
similarity index 87%
rename from src/test/java/de/julielab/xmlData/config/ConfigReaderTest.java
rename to src/test/java/de/julielab/costosys/configuration/ConfigReaderTest.java
index b91c50f..8a0deff 100644
--- a/src/test/java/de/julielab/xmlData/config/ConfigReaderTest.java
+++ b/src/test/java/de/julielab/costosys/configuration/ConfigReaderTest.java
@@ -13,7 +13,7 @@
* Creation date: 06.04.2011
**/
-package de.julielab.xmlData.config;
+package de.julielab.costosys.configuration;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
@@ -25,8 +25,6 @@
import java.util.List;
import java.util.Map;
-import de.julielab.xmlData.Constants;
-import de.julielab.xmlData.dataBase.DataBaseConnector;
import org.apache.commons.io.IOUtils;
import org.junit.Test;
@@ -68,7 +66,7 @@ public void testMergeConfigDataWithAll() throws SecurityException,
userConf = IOUtils.toByteArray(is);
// Merge default and user configuration.
- Method mergeConfigData = ConfigReader.class.getDeclaredMethod(
+ Method mergeConfigData = de.julielab.costosys.configuration.ConfigReader.class.getDeclaredMethod(
"mergeConfigData", byte[].class, byte[].class);
mergeConfigData.setAccessible(true);
mergedConf = (byte[]) mergeConfigData.invoke(null, defaultConf,
@@ -94,7 +92,7 @@ public void testMergeConfigDataWithoutSchema() throws SecurityException,
byte[] mergedConf;
byte[] mergedConfCorrect;
- Method mergeConfigData = ConfigReader.class.getDeclaredMethod(
+ Method mergeConfigData = de.julielab.costosys.configuration.ConfigReader.class.getDeclaredMethod(
"mergeConfigData", byte[].class, byte[].class);
mergeConfigData.setAccessible(true);
is = ConfigReaderTest.class
@@ -106,7 +104,7 @@ public void testMergeConfigDataWithoutSchema() throws SecurityException,
userConf = IOUtils.toByteArray(is);
// Merge default and user configuration.
- mergedConf = (byte[]) mergeConfigData.invoke(new ConfigReader(null),
+ mergedConf = (byte[]) mergeConfigData.invoke(new de.julielab.costosys.configuration.ConfigReader(null),
defaultConf, userConf);
// Check whether the result matches the correct version.
@@ -129,7 +127,7 @@ public void testMergeConfigDataWithoutDB() throws SecurityException,
byte[] mergedConf;
byte[] mergedConfCorrect;
- Method mergeConfigData = ConfigReader.class.getDeclaredMethod(
+ Method mergeConfigData = de.julielab.costosys.configuration.ConfigReader.class.getDeclaredMethod(
"mergeConfigData", byte[].class, byte[].class);
mergeConfigData.setAccessible(true);
is = ConfigReaderTest.class
@@ -141,7 +139,7 @@ public void testMergeConfigDataWithoutDB() throws SecurityException,
userConf = IOUtils.toByteArray(is);
// Merge default and user configuration.
- mergedConf = (byte[]) mergeConfigData.invoke(new ConfigReader(null),
+ mergedConf = (byte[]) mergeConfigData.invoke(new de.julielab.costosys.configuration.ConfigReader(null),
defaultConf, userConf);
// Check whether the result matches the correct version.
@@ -158,7 +156,7 @@ public void dbConfigTest() throws VTDException, IOException {
// Just read in any configuration defining a database connection.
is = ConfigReaderTest.class
.getResourceAsStream("/configuration/confWithAll.xml");
- DBConfig dbconf = new DBConfig(IOUtils.toByteArray(is));
+ de.julielab.costosys.configuration.DBConfig dbconf = new DBConfig(IOUtils.toByteArray(is));
assertEquals("jdbc:postgresql://aserver.net/aDB", dbconf.getUrl());
assertEquals("anotherschema", dbconf.getActiveDataPGSchema());
}
@@ -171,8 +169,8 @@ public void fieldConfigTest() throws VTDException, IOException {
.getResourceAsStream("/configuration/confWithAll.xml");
byte[] config = IOUtils.toByteArray(is);
String activeSchemaName = ConfigBase.getActiveConfig(config,
- ConfigReader.XPATH_ACTIVE_TABLE_SCHEMA);
- FieldConfig fc = new FieldConfig(config, activeSchemaName);
+ de.julielab.costosys.configuration.ConfigReader.XPATH_ACTIVE_TABLE_SCHEMA);
+ de.julielab.costosys.configuration.FieldConfig fc = new FieldConfig(config, activeSchemaName);
List> fields = fc.getFields();
Map field = fields.get(0);
@@ -206,10 +204,10 @@ public void fieldConfigTest() throws VTDException, IOException {
public void configReaderTest() {
InputStream is = null;
@SuppressWarnings("unused")
- ConfigReader cr = null;
+ de.julielab.costosys.configuration.ConfigReader cr = null;
// It is valid not to deliver a user configuration at all. The default
// should be used. This shouldn't raise any error.
- cr = new ConfigReader(is);
+ cr = new de.julielab.costosys.configuration.ConfigReader(is);
// Now load a quite normal schema.
is = ConfigReaderTest.class
@@ -218,20 +216,20 @@ public void configReaderTest() {
assertTrue(is != null);
// Now check whether the merging of configurations without errors.
- cr = new ConfigReader(is);
+ cr = new de.julielab.costosys.configuration.ConfigReader(is);
// Repeat with different kinds of configurations.
is = ConfigReaderTest.class
.getResourceAsStream("/configuration/confWithoutSchema.xml");
assertTrue(is != null);
- cr = new ConfigReader(is);
+ cr = new de.julielab.costosys.configuration.ConfigReader(is);
// ----------------------------
is = ConfigReaderTest.class
.getResourceAsStream("/configuration/confWithoutDB.xml");
assertTrue(is != null);
- cr = new ConfigReader(is);
+ cr = new de.julielab.costosys.configuration.ConfigReader(is);
}
@Test
@@ -242,7 +240,7 @@ public void testGetAllSchemaNames() throws SecurityException,
// ConfigReader.
InputStream is = ConfigReaderTest.class
.getResourceAsStream("/configuration/confWithAll.xml");
- ConfigReader cr = new ConfigReader(is);
+ de.julielab.costosys.configuration.ConfigReader cr = new ConfigReader(is);
Method getSchemaNamesMethod = cr.getClass().getDeclaredMethod(
"getAllSchemaNames", byte[].class);
diff --git a/src/test/java/de/julielab/xmlData/config/FieldConfigTest.java b/src/test/java/de/julielab/costosys/configuration/FieldConfigTest.java
similarity index 83%
rename from src/test/java/de/julielab/xmlData/config/FieldConfigTest.java
rename to src/test/java/de/julielab/costosys/configuration/FieldConfigTest.java
index 02e8234..ba96233 100644
--- a/src/test/java/de/julielab/xmlData/config/FieldConfigTest.java
+++ b/src/test/java/de/julielab/costosys/configuration/FieldConfigTest.java
@@ -16,8 +16,8 @@
/**
*
*/
-package de.julielab.xmlData.config;
-import static de.julielab.xmlData.config.FieldConfig.createField;
+package de.julielab.costosys.configuration;
+import static de.julielab.costosys.configuration.FieldConfig.createField;
import static org.assertj.core.api.Assertions.*;
import static org.junit.Assert.assertEquals;
@@ -48,7 +48,7 @@ public class FieldConfigTest {
public void testBuildFields() throws SecurityException, NoSuchMethodException, FileNotFoundException, IOException, VTDException {
byte[] configData = IOUtils.toByteArray(new FileInputStream("src/test/resources/configuration/confWithAll.xml"));
// Test schema without explicit field closing tags.
- FieldConfig fieldConfig = new FieldConfig(configData, "userTableSchema1");
+ de.julielab.costosys.configuration.FieldConfig fieldConfig = new de.julielab.costosys.configuration.FieldConfig(configData, "userTableSchema1");
List> fields = fieldConfig.getFields();
Map field1 = fields.get(0);
Map field2 = fields.get(1);
@@ -69,7 +69,7 @@ public void testBuildFields() throws SecurityException, NoSuchMethodException, F
assertEquals("true", field2.get(JulieXMLConstants.RETRIEVE));
// Test schema with explicit field closing tags.
- fieldConfig = new FieldConfig(configData, "userTableSchema2");
+ fieldConfig = new de.julielab.costosys.configuration.FieldConfig(configData, "userTableSchema2");
fields = fieldConfig.getFields();
field1 = fields.get(0);
field2 = fields.get(1);
@@ -99,11 +99,11 @@ public void testIncompleteProgramaticallyDefinedFieldConfig() {
field.put(JulieXMLConstants.NAME, "field1");
fields.add(field);
// The type property is missing
- assertThatThrownBy(() -> new FieldConfig(fields, "", "testschema")).hasMessageContaining("required \"" + JulieXMLConstants.TYPE + "\" property");
+ assertThatThrownBy(() -> new de.julielab.costosys.configuration.FieldConfig(fields, "", "testschema")).hasMessageContaining("required \"" + JulieXMLConstants.TYPE + "\" property");
field.remove(JulieXMLConstants.NAME);
field.put(JulieXMLConstants.TYPE, "type1");
// Now the name property is missing
- assertThatThrownBy(() -> new FieldConfig(fields, "", "testschema")).hasMessageContaining("required \"" + JulieXMLConstants.NAME + "\" property");
+ assertThatThrownBy(() -> new de.julielab.costosys.configuration.FieldConfig(fields, "", "testschema")).hasMessageContaining("required \"" + JulieXMLConstants.NAME + "\" property");
}
@Test
@@ -113,7 +113,7 @@ public void testProgrammaticallyDefinedFieldConfig() {
fields.add(createField(JulieXMLConstants.NAME, "field1",JulieXMLConstants.TYPE, "type1", JulieXMLConstants.PRIMARY_KEY, "true"));
fields.add(createField(JulieXMLConstants.NAME, "field2", JulieXMLConstants.TYPE, "type2", JulieXMLConstants.PRIMARY_KEY, "true"));
fields.add(createField(JulieXMLConstants.NAME, "field3",JulieXMLConstants.TYPE, "type3", JulieXMLConstants.RETRIEVE, "true"));
- FieldConfig config = new FieldConfig(fields, "foreach", "testschema");
+ de.julielab.costosys.configuration.FieldConfig config = new FieldConfig(fields, "foreach", "testschema");
assertThat(config.getPrimaryKeyString()).isEqualToIgnoringWhitespace("field1,field2");
assertThat(config.getColumnsToRetrieve()).isEqualTo(new String[]{"field3"});
assertThat(config.getForEachXPath()).isEqualTo("foreach");
diff --git a/src/test/java/de/julielab/xmlData/dataBase/DataBaseConnectorTest.java b/src/test/java/de/julielab/costosys/dbconnection/DataBaseConnectorTest.java
similarity index 90%
rename from src/test/java/de/julielab/xmlData/dataBase/DataBaseConnectorTest.java
rename to src/test/java/de/julielab/costosys/dbconnection/DataBaseConnectorTest.java
index 3be86c9..462815e 100644
--- a/src/test/java/de/julielab/xmlData/dataBase/DataBaseConnectorTest.java
+++ b/src/test/java/de/julielab/costosys/dbconnection/DataBaseConnectorTest.java
@@ -1,8 +1,8 @@
-package de.julielab.xmlData.dataBase;
+package de.julielab.costosys.dbconnection;
-import de.julielab.xmlData.Constants;
-import de.julielab.xmlData.cli.TableNotFoundException;
-import de.julielab.xmlData.dataBase.util.TableSchemaMismatchException;
+import de.julielab.costosys.Constants;
+import de.julielab.costosys.cli.TableNotFoundException;
+import de.julielab.costosys.dbconnection.util.TableSchemaMismatchException;
import org.testcontainers.containers.PostgreSQLContainer;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
@@ -20,13 +20,13 @@
public class DataBaseConnectorTest {
public static PostgreSQLContainer postgres;
- private static DataBaseConnector dbc;
+ private static de.julielab.costosys.dbconnection.DataBaseConnector dbc;
@BeforeClass
public static void setup() {
postgres = new PostgreSQLContainer();
postgres.start();
- dbc = new DataBaseConnector(postgres.getJdbcUrl(), postgres.getUsername(), postgres.getPassword());
+ dbc = new de.julielab.costosys.dbconnection.DataBaseConnector(postgres.getJdbcUrl(), postgres.getUsername(), postgres.getPassword());
dbc.setActiveTableSchema("medline_2016");
}
@@ -104,7 +104,7 @@ public void testQuerySubset() throws SQLException {
dbc.initSubset("querysubset", Constants.DEFAULT_DATA_TABLE_NAME);
assertThat(dbc.getNumRows("querysubset")).isGreaterThan(0);
dbc.releaseConnections();
- DBCIterator it = dbc.querySubset("querysubset", 0);
+ de.julielab.costosys.dbconnection.DBCIterator it = dbc.querySubset("querysubset", 0);
Set retrieved = new HashSet<>();
while (it.hasNext()) {
byte[][] next = it.next();
diff --git a/src/test/java/de/julielab/xmlData/dataBase/ThreadedColumnsIteratorTest.java b/src/test/java/de/julielab/costosys/dbconnection/ThreadedColumnsIteratorTest.java
similarity index 73%
rename from src/test/java/de/julielab/xmlData/dataBase/ThreadedColumnsIteratorTest.java
rename to src/test/java/de/julielab/costosys/dbconnection/ThreadedColumnsIteratorTest.java
index d25522a..1189eba 100644
--- a/src/test/java/de/julielab/xmlData/dataBase/ThreadedColumnsIteratorTest.java
+++ b/src/test/java/de/julielab/costosys/dbconnection/ThreadedColumnsIteratorTest.java
@@ -1,6 +1,6 @@
-package de.julielab.xmlData.dataBase;
+package de.julielab.costosys.dbconnection;
-import de.julielab.xmlData.Constants;
+import de.julielab.costosys.Constants;
import org.junit.BeforeClass;
import org.junit.ClassRule;
import org.junit.Test;
@@ -9,7 +9,6 @@
import org.testcontainers.containers.PostgreSQLContainer;
import java.io.IOException;
-import java.sql.Connection;
import java.sql.SQLException;
import java.util.Arrays;
@@ -19,7 +18,7 @@ public class ThreadedColumnsIteratorTest {
private final static Logger log = LoggerFactory.getLogger(ThreadedColumnsIteratorTest.class);
@ClassRule
public static PostgreSQLContainer postgres = (PostgreSQLContainer) new PostgreSQLContainer();
- private static DataBaseConnector dbc;
+ private static de.julielab.costosys.dbconnection.DataBaseConnector dbc;
@BeforeClass
public static void setup() throws SQLException, IOException {
@@ -36,7 +35,7 @@ public static void setup() throws SQLException, IOException {
@Test
public void testIterator() throws SQLException {
try (CoStoSysConnection conn = dbc.reserveConnection()) {
- ThreadedColumnsIterator it = new ThreadedColumnsIterator(dbc, conn, Arrays.asList("pmid", "xml"), Constants.DEFAULT_DATA_TABLE_NAME);
+ de.julielab.costosys.dbconnection.ThreadedColumnsIterator it = new de.julielab.costosys.dbconnection.ThreadedColumnsIterator(dbc, conn, Arrays.asList("pmid", "xml"), Constants.DEFAULT_DATA_TABLE_NAME);
int numRetrieved = 0;
while (it.hasNext()) {
Object[] next = it.next();
@@ -50,7 +49,7 @@ public void testIterator() throws SQLException {
@Test
public void testIteratorWithoutExternalConnection() throws InterruptedException {
// Repeat the very same lines of code a few times to make sure that connections are released properly
- ThreadedColumnsIterator it = new ThreadedColumnsIterator(dbc, Arrays.asList("pmid", "xml"), Constants.DEFAULT_DATA_TABLE_NAME);
+ de.julielab.costosys.dbconnection.ThreadedColumnsIterator it = new de.julielab.costosys.dbconnection.ThreadedColumnsIterator(dbc, Arrays.asList("pmid", "xml"), Constants.DEFAULT_DATA_TABLE_NAME);
int numRetrieved = 0;
while (it.hasNext()) {
Object[] next = it.next();
@@ -59,7 +58,7 @@ public void testIteratorWithoutExternalConnection() throws InterruptedException
}
assertEquals(10, numRetrieved);
- it = new ThreadedColumnsIterator(dbc, Arrays.asList("pmid", "xml"), Constants.DEFAULT_DATA_TABLE_NAME);
+ it = new de.julielab.costosys.dbconnection.ThreadedColumnsIterator(dbc, Arrays.asList("pmid", "xml"), Constants.DEFAULT_DATA_TABLE_NAME);
numRetrieved = 0;
while (it.hasNext()) {
Object[] next = it.next();
@@ -68,7 +67,7 @@ public void testIteratorWithoutExternalConnection() throws InterruptedException
}
assertEquals(10, numRetrieved);
- it = new ThreadedColumnsIterator(dbc, Arrays.asList("pmid", "xml"), Constants.DEFAULT_DATA_TABLE_NAME);
+ it = new de.julielab.costosys.dbconnection.ThreadedColumnsIterator(dbc, Arrays.asList("pmid", "xml"), Constants.DEFAULT_DATA_TABLE_NAME);
numRetrieved = 0;
while (it.hasNext()) {
Object[] next = it.next();
@@ -83,7 +82,7 @@ public void testIteratorWithoutExternalConnection() throws InterruptedException
@Test
public void testIteratorWithLimit() throws SQLException {
try (CoStoSysConnection conn = dbc.reserveConnection()) {
- ThreadedColumnsIterator it = new ThreadedColumnsIterator(dbc, conn, Arrays.asList("pmid", "xml"), Constants.DEFAULT_DATA_TABLE_NAME, 2);
+ de.julielab.costosys.dbconnection.ThreadedColumnsIterator it = new ThreadedColumnsIterator(dbc, conn, Arrays.asList("pmid", "xml"), Constants.DEFAULT_DATA_TABLE_NAME, 2);
int numRetrieved = 0;
while (it.hasNext()) {
Object[] next = it.next();
diff --git a/src/test/java/de/julielab/xmlData/dataBase/ThreadedColumnsToRetrieveIteratorTest.java b/src/test/java/de/julielab/costosys/dbconnection/ThreadedColumnsToRetrieveIteratorTest.java
similarity index 74%
rename from src/test/java/de/julielab/xmlData/dataBase/ThreadedColumnsToRetrieveIteratorTest.java
rename to src/test/java/de/julielab/costosys/dbconnection/ThreadedColumnsToRetrieveIteratorTest.java
index a89acaa..e31e7f6 100644
--- a/src/test/java/de/julielab/xmlData/dataBase/ThreadedColumnsToRetrieveIteratorTest.java
+++ b/src/test/java/de/julielab/costosys/dbconnection/ThreadedColumnsToRetrieveIteratorTest.java
@@ -1,6 +1,6 @@
-package de.julielab.xmlData.dataBase;
+package de.julielab.costosys.dbconnection;
-import de.julielab.xmlData.Constants;
+import de.julielab.costosys.Constants;
import org.junit.BeforeClass;
import org.junit.ClassRule;
import org.junit.Test;
@@ -9,10 +9,8 @@
import org.testcontainers.containers.PostgreSQLContainer;
import java.io.IOException;
-import java.sql.Connection;
import java.sql.SQLException;
import java.util.Arrays;
-import java.util.List;
import static org.junit.Assert.assertEquals;
@@ -20,7 +18,7 @@ public class ThreadedColumnsToRetrieveIteratorTest {
private final static Logger log = LoggerFactory.getLogger(ThreadedColumnsToRetrieveIteratorTest.class);
@ClassRule
public static PostgreSQLContainer postgres = (PostgreSQLContainer) new PostgreSQLContainer();
- private static DataBaseConnector dbc;
+ private static de.julielab.costosys.dbconnection.DataBaseConnector dbc;
@BeforeClass
public static void setup() throws SQLException, IOException {
@@ -37,7 +35,7 @@ public static void setup() throws SQLException, IOException {
@Test
public void testIterator() throws Exception {
try (CoStoSysConnection conn = dbc.reserveConnection()) {
- ThreadedColumnsToRetrieveIterator it = new ThreadedColumnsToRetrieveIterator(dbc, conn, Arrays.asList(new Object[]{"10922238"}), Constants.DEFAULT_DATA_TABLE_NAME, "medline_2016");
+ de.julielab.costosys.dbconnection.ThreadedColumnsToRetrieveIterator it = new de.julielab.costosys.dbconnection.ThreadedColumnsToRetrieveIterator(dbc, conn, Arrays.asList(new Object[]{"10922238"}), Constants.DEFAULT_DATA_TABLE_NAME, "medline_2016");
int numRetrieved = 0;
while (it.hasNext()) {
Object[] next = it.next();
@@ -50,7 +48,7 @@ public void testIterator() throws Exception {
@Test
public void testIteratorWithoutExternalConnection() throws Exception {
- ThreadedColumnsToRetrieveIterator it = new ThreadedColumnsToRetrieveIterator(dbc, null, Arrays.asList(new Object[]{"10922238"}), Constants.DEFAULT_DATA_TABLE_NAME, "medline_2016");
+ de.julielab.costosys.dbconnection.ThreadedColumnsToRetrieveIterator it = new ThreadedColumnsToRetrieveIterator(dbc, null, Arrays.asList(new Object[]{"10922238"}), Constants.DEFAULT_DATA_TABLE_NAME, "medline_2016");
int numRetrieved = 0;
while (it.hasNext()) {
Object[] next = it.next();