From c6ba1a60cc7ca4f770fff0f4cdb833d70b9e1ab4 Mon Sep 17 00:00:00 2001 From: Richard Zowalla <13417392+rzo1@users.noreply.github.com> Date: Thu, 19 Oct 2023 18:12:35 +0200 Subject: [PATCH] Resolves #226 "Transition from javax to jakarta" (#227) * Upgrade Spring 4 to Spring 5 (5.3.30) Upgrade Hibernate to 5.6 (5.6.15) Upgrade HSQLDB to 2.7.2 * Use correct Maven coordinates for Hibernate Remove C3P0 connection pool used in tests * Fix deprecations (language migration) * PostgreSQL Upgrade to 42.6.0 * Upgrade JGraphT to 1.5.2 * Move to log4j2 * Remove unused trove4j library * Remove unused jfreechart * Remove unused dependency towards apache ant * Remove dependency towards javax.mail * Remove deprecated mail logger, fix compile issues * Fork from ant bzip2 to avoid direct dependency to build tool * Compiler fixes + dependency upgrades * Fork of ant tools to avoid build tools as dependency * Fix compile issues from trove removal * Rename log4j config * Move JWPL to jakarta.* namespace targeting EE 9.1 (Hibernate 6.1.7, JPA 3.0) * Bumps version to 2.x to indicate a major change (javax -> jakarta) * adds the possibility to use MariaDB JDBC driver as alternative to proprietary MySQL connector switches to recommended collation format on modern mysql/mariadb DBMS environments: utf8mb4_bin sets mysql-connector-java driver to 'runtime' scope, as it should not be compiled against those classes * Remove Statistics* as these classes were used to connect to bender and compute some stuff. Won't work anyway anymore. * Remove unused file * Fix PerformanceIT * Log4J2 configs * Add "jwpl-*" prefix * Remove provided scope in dependency management Fix scope of log4j2 dependencies in parser module * Refactor Boolean.XY -> true/false * Move to commons-compress and remove forked ant code * Fix refactoring issue regarding hbm2ddl.auto * Introduce fastutil-core to avoid object-based arrays / maps in version dumpers --------- Co-authored-by: Martin Wiesner --- .github/workflows/maven.yml | 2 +- ...ormatik.tu-darmstadt.de_wikiapi_test__test | Bin 2782 -> 0 bytes de.tudarmstadt.ukp.wikipedia.api/pom.xml | 80 ++--- .../ukp/wikipedia/api/PerformanceIT.java | 5 +- .../src/it/resources/log4j.xml | 58 ---- .../src/it/resources/log4j2.xml | 39 +++ .../ukp/wikipedia/api/Category.java | 30 +- .../ukp/wikipedia/api/CategoryGraph.java | 26 +- .../wikipedia/api/DatabaseConfiguration.java | 16 +- .../tudarmstadt/ukp/wikipedia/api/Page.java | 33 +- .../ukp/wikipedia/api/PageIterator.java | 2 +- .../ukp/wikipedia/api/Wikipedia.java | 21 +- .../api/hibernate/WikiHibernateUtil.java | 17 +- .../api/util/GraphSerialization.java | 26 +- .../api/util/SerializableDirectedGraph.java | 8 +- .../api/util/GraphSerializationTest.java | 5 +- .../src/test/resources/log4j.properties | 72 ----- .../src/test/resources/log4j2.xml | 39 +++ .../pom.xml | 18 +- .../version/SingleDumpVersionJDKGeneric.java | 16 +- .../version/SingleDumpVersionOriginal.java | 25 +- .../resources/context/applicationContext.xml | 2 +- .../src/main/resources/log4j.xml | 61 ---- .../src/main/resources/log4j2.xml | 35 ++ .../{log4j.properties => log4j2.properties} | 0 de.tudarmstadt.ukp.wikipedia.mwdumper/pom.xml | 14 +- .../ukp/wikipedia/mwdumper/dumper/Tools.java | 10 +- de.tudarmstadt.ukp.wikipedia.parser/pom.xml | 10 +- .../parser/statistics/Statistics.java | 305 ------------------ .../parser/statistics/Statistics2.java | 216 ------------- .../src/test/resources/log4j.properties | 72 ----- .../src/test/resources/log4j2.xml | 31 ++ .../pom.xml | 18 +- .../archivers/Bzip2Archiver.java | 14 +- .../config/gui/panels/FilterPanel.java | 52 +-- .../main/resources/{log4j.xml => log4j2.xml} | 0 .../wikiapi_simple_20090119_stripped.script | 13 +- .../src/test/resources/log4j.properties | 72 ----- .../src/test/resources/log4j2.xml | 31 ++ .../pom.xml | 18 +- ...ey.java => DumpVersionFastUtilIntKey.java} | 35 +- .../dump/version/DumpVersionJDKGeneric.java | 16 +- .../resources/context/applicationContext.xml | 4 +- .../src/main/resources/log4j.xml | 61 ---- .../src/main/resources/log4j2.xml | 31 ++ de.tudarmstadt.ukp.wikipedia.tutorial/pom.xml | 4 +- de.tudarmstadt.ukp.wikipedia.util/pom.xml | 21 +- .../util/templates/parser/ParseUtils.java | 5 +- .../pom.xml | 10 +- .../context/applicationContextTemplate.xml | 2 +- .../wikimachine/debug/MailMemoryLogger.java | 152 --------- .../decompression/BZip2Decompressor.java | 6 +- .../src/test/resources/log4j.properties | 17 - .../src/test/resources/log4j2.xml | 31 ++ jwpl-deps/jwpl-swc-engine-shade/pom.xml | 147 +++++++++ jwpl-deps/pom.xml | 33 ++ pom.xml | 123 +++---- 57 files changed, 746 insertions(+), 1464 deletions(-) delete mode 100644 de.tudarmstadt.ukp.wikipedia.api/catGraphSer_bender.ukp.informatik.tu-darmstadt.de_wikiapi_test__test delete mode 100644 de.tudarmstadt.ukp.wikipedia.api/src/it/resources/log4j.xml create mode 100644 de.tudarmstadt.ukp.wikipedia.api/src/it/resources/log4j2.xml delete mode 100644 de.tudarmstadt.ukp.wikipedia.api/src/test/resources/log4j.properties create mode 100644 de.tudarmstadt.ukp.wikipedia.api/src/test/resources/log4j2.xml delete mode 100644 de.tudarmstadt.ukp.wikipedia.datamachine/src/main/resources/log4j.xml create mode 100644 de.tudarmstadt.ukp.wikipedia.datamachine/src/main/resources/log4j2.xml rename de.tudarmstadt.ukp.wikipedia.datamachine/src/test/resources/{log4j.properties => log4j2.properties} (100%) delete mode 100644 de.tudarmstadt.ukp.wikipedia.parser/src/main/java/de/tudarmstadt/ukp/wikipedia/parser/statistics/Statistics.java delete mode 100644 de.tudarmstadt.ukp.wikipedia.parser/src/main/java/de/tudarmstadt/ukp/wikipedia/parser/statistics/Statistics2.java delete mode 100644 de.tudarmstadt.ukp.wikipedia.parser/src/test/resources/log4j.properties create mode 100644 de.tudarmstadt.ukp.wikipedia.parser/src/test/resources/log4j2.xml rename de.tudarmstadt.ukp.wikipedia.revisionmachine/src/main/resources/{log4j.xml => log4j2.xml} (100%) delete mode 100644 de.tudarmstadt.ukp.wikipedia.revisionmachine/src/test/resources/log4j.properties create mode 100644 de.tudarmstadt.ukp.wikipedia.revisionmachine/src/test/resources/log4j2.xml rename de.tudarmstadt.ukp.wikipedia.timemachine/src/main/java/de/tudarmstadt/ukp/wikipedia/timemachine/dump/version/{DumpVersionTroveIntKey.java => DumpVersionFastUtilIntKey.java} (92%) delete mode 100644 de.tudarmstadt.ukp.wikipedia.timemachine/src/main/resources/log4j.xml create mode 100644 de.tudarmstadt.ukp.wikipedia.timemachine/src/main/resources/log4j2.xml delete mode 100644 de.tudarmstadt.ukp.wikipedia.wikimachine/src/main/java/de/tudarmstadt/ukp/wikipedia/wikimachine/debug/MailMemoryLogger.java delete mode 100644 de.tudarmstadt.ukp.wikipedia.wikimachine/src/test/resources/log4j.properties create mode 100644 de.tudarmstadt.ukp.wikipedia.wikimachine/src/test/resources/log4j2.xml create mode 100644 jwpl-deps/jwpl-swc-engine-shade/pom.xml create mode 100644 jwpl-deps/pom.xml diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 42f1a0664..e8c65293f 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -41,4 +41,4 @@ jobs: distribution: temurin java-version: ${{ matrix.java }} - name: Build with Maven - run: mvn -V clean test --no-transfer-progress \ No newline at end of file + run: mvn -V clean package --no-transfer-progress \ No newline at end of file diff --git a/de.tudarmstadt.ukp.wikipedia.api/catGraphSer_bender.ukp.informatik.tu-darmstadt.de_wikiapi_test__test b/de.tudarmstadt.ukp.wikipedia.api/catGraphSer_bender.ukp.informatik.tu-darmstadt.de_wikiapi_test__test deleted file mode 100644 index 134a1e4c7d79e0e4c9ac622f12899a36a26b181d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2782 zcma)7O>7%Q6dpUa6E~!(OG+A2C#22~P4I5)#7;vIA+!`CxvfORfrtYW@5G*D<6UOQ z&bC5Gkq{ukkxKMX!GQ}>PFxWpfDk=_;DAt1fCNHF95?~u!n|2~XC1GlSn_!Heee6; zn>X{G5B@}PhoKdNXuNJBTST1}xYooq>84#FTexoW zHs@S^F<&gq7o!Nd3}re=tD6pIxXho%4w32n`|U@G$R}fmQIyy~1K2dJJzA@jEX#H_ z(3#fzH4%u~CV9rF5T}ItiTJ%LWGr@*qmj1c!KDdZDxqHRyoPO#220zxfpsX3E(`)Y z3D`Q}zR>wLA-PK(}w+Nz?5hRK^(^;3xCUzVsiV?^^ zUtB0IEiGS^N0o$Zr5i^nZ!adURdi_*$_<9nox06K0@3Wn+;SnGU-Hgoywi8B##!B= z4OwO79W9}RW7S!iaCABl_{Qe8xK*Me365EX@K?}lXi#33VW_ls4NJ7PLyYIJQx!E| zJiPwWDf93@v7ZgpkIQA^I5(IHc?rdK)Tb3(1I-JR_c>pgrV8fX87U>4DdgFeC=ZXc!U)Bprs_ z17utw56ef$AOXmPLeBX}5|E5S#(bm?kST?n_mN&eW&#MjPs4KIh;b5(vWuWX&@sG9oymEQrj9crL=&YNgmL1EE@=tFrV`?M zW{;DNpG7Qj&Z56P@s^w!%`8Caao2_)c}bG8h^fxj0gqu(faT+aY~=~F<+4@dnAQlZ zqF?WR#lHDM7NL*U%GRzJ=k3#1j8n1UKC8|vmH;8>ikus5{}=LKOrF#WHB#HM#QZS? z^Um0ht6O#ZzBk1lR8zx0RCoXT^qsHq-5zN2m_u(7*~Dls2CeMMy|0SLxHQNP7=j!| zU>Sua4a>L->sjgkq)&=?DgX^Bsg^dFy;conj3`DxXF>+@`Z$y}8cNG}R+tI*x+giV zxWOd7*$^5ntr&qMr(}}e;0UEnC|ZxKksNz7P>~C;%t`@oYJ>{RD1oRHkh69UxLL{d zribDNyryIrB@j~?=m6zlS%9S=r6!LQEFhHuuURD(tlDEoXo337g)_A7jBGe#zB@w? nXXLvxa^Z}{?u> de.tudarmstadt.ukp.wikipedia de.tudarmstadt.ukp.wikipedia - 1.2.0-SNAPSHOT + 2.0.0-SNAPSHOT @@ -37,12 +37,12 @@ - org.hibernate + org.hibernate.orm hibernate-core - org.jgrapht - jgrapht-jdk1.5 + org.jgrapht + jgrapht-core de.fau.cs.osr.utils @@ -61,8 +61,19 @@ swc-parser-lazy - org.sweble.wikitext - swc-engine + de.tudarmstadt.ukp.wikipedia + jwpl-swc-engine-shade + ${project.version} + + + org.sweble.wikitext + swc-engine + + + org.sweble.wikitext + swc-parser-lazy + + com.neovisionaries @@ -77,16 +88,16 @@ org.slf4j slf4j-api - - org.slf4j - slf4j-log4j12 - test - - - log4j - log4j - test - + + org.apache.logging.log4j + log4j-slf4j-impl + test + + + org.apache.logging.log4j + log4j-core + test + @@ -102,43 +113,20 @@ - mysql - mysql-connector-java - test - - - org.hibernate - hibernate-c3p0 - test - - - com.mchange - c3p0 + org.mariadb.jdbc + mariadb-java-client test - - mysql-ukp - - bender.ukp.informatik.tu-darmstadt.de - wikiapi_en - student - student - - 4000 - 5 - 50 - - - - - mysql-hhn + + test-it-perf-mariadb-hhn - tulum-data.gecko.hs-heilbronn.de - wikiapi_de_jwpl_IT_test + + localhost + wikiapi_simple_20090119 student student diff --git a/de.tudarmstadt.ukp.wikipedia.api/src/it/java/de/tudarmstadt/ukp/wikipedia/api/PerformanceIT.java b/de.tudarmstadt.ukp.wikipedia.api/src/it/java/de/tudarmstadt/ukp/wikipedia/api/PerformanceIT.java index 196bb858c..42de6aaf3 100644 --- a/de.tudarmstadt.ukp.wikipedia.api/src/it/java/de/tudarmstadt/ukp/wikipedia/api/PerformanceIT.java +++ b/de.tudarmstadt.ukp.wikipedia.api/src/it/java/de/tudarmstadt/ukp/wikipedia/api/PerformanceIT.java @@ -59,7 +59,10 @@ private static DatabaseConfiguration obtainITDBConfiguration(Properties configur String user = configuration.getProperty("database.user"); String password = configuration.getProperty("database.password"); // String host, String database, String user, String password, Language language - return new DatabaseConfiguration(host, name, user, password, Language.english); + return new DatabaseConfiguration( + "org.mariadb.jdbc.Driver", + "jdbc:mariadb://" + host + "/" + name + "?serverTimezone=Europe/Berlin&autoReconnect=true&useSSL=false", + host, name, user, password, Language.english); } private static Properties loadConfiguration() { diff --git a/de.tudarmstadt.ukp.wikipedia.api/src/it/resources/log4j.xml b/de.tudarmstadt.ukp.wikipedia.api/src/it/resources/log4j.xml deleted file mode 100644 index 3d6306431..000000000 --- a/de.tudarmstadt.ukp.wikipedia.api/src/it/resources/log4j.xml +++ /dev/null @@ -1,58 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/de.tudarmstadt.ukp.wikipedia.api/src/it/resources/log4j2.xml b/de.tudarmstadt.ukp.wikipedia.api/src/it/resources/log4j2.xml new file mode 100644 index 000000000..c924a1c67 --- /dev/null +++ b/de.tudarmstadt.ukp.wikipedia.api/src/it/resources/log4j2.xml @@ -0,0 +1,39 @@ + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/Category.java b/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/Category.java index b180d2ca8..beeed33ad 100644 --- a/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/Category.java +++ b/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/Category.java @@ -17,19 +17,17 @@ */ package de.tudarmstadt.ukp.wikipedia.api; -import java.math.BigInteger; import java.util.HashSet; import java.util.Set; import org.hibernate.LockMode; import org.hibernate.Session; -import org.hibernate.type.LongType; -import org.hibernate.type.StringType; import de.tudarmstadt.ukp.wikipedia.api.exception.WikiApiException; import de.tudarmstadt.ukp.wikipedia.api.exception.WikiPageNotFoundException; import de.tudarmstadt.ukp.wikipedia.api.exception.WikiTitleParsingException; import de.tudarmstadt.ukp.wikipedia.api.hibernate.CategoryDAO; +import org.hibernate.type.StandardBasicTypes; public class Category implements WikiConstants { @@ -114,7 +112,7 @@ private void createCategory(Title title) throws WikiPageNotFoundException { query += Wikipedia.SQL_COLLATION; } returnValue = session.createNativeQuery(query) - .setParameter("name", name, StringType.INSTANCE) + .setParameter("name", name, StandardBasicTypes.STRING) .uniqueResult(); session.getTransaction().commit(); @@ -181,20 +179,20 @@ public Set getParents() { * @return The number of parents of this category. */ public int getNumberOfParents() { - BigInteger nrOfInlinks = new BigInteger("0"); + int nrOfInlinks = 0; long id = this.__getId(); Session session = this.wiki.__getHibernateSession(); session.beginTransaction(); Object returnValue = session.createNativeQuery("select count(inLinks) from category_inlinks where id = :id") - .setParameter("id", id, LongType.INSTANCE) + .setParameter("id", id, StandardBasicTypes.LONG) .uniqueResult(); session.getTransaction().commit(); if (returnValue != null) { - nrOfInlinks = (BigInteger) returnValue; + nrOfInlinks = ((Long) returnValue).intValue(); } - return nrOfInlinks.intValue(); + return nrOfInlinks; } /** @@ -231,20 +229,20 @@ public Set getChildren() { * @return The number of children of this category. */ public int getNumberOfChildren() { - BigInteger nrOfOutlinks = new BigInteger("0"); + int nrOfOutlinks = 0; long id = this.__getId(); Session session = this.wiki.__getHibernateSession(); session.beginTransaction(); Object returnValue = session.createNativeQuery("select count(outLinks) from category_outlinks where id = :id") - .setParameter("id", id, LongType.INSTANCE) + .setParameter("id", id, StandardBasicTypes.LONG) .uniqueResult(); session.getTransaction().commit(); if (returnValue != null) { - nrOfOutlinks = (BigInteger) returnValue; + nrOfOutlinks = ((Long) returnValue).intValue(); } - return nrOfOutlinks.intValue(); + return nrOfOutlinks; } /** @@ -304,20 +302,20 @@ public Set getArticleIds() { * @return The number of pages. */ public int getNumberOfPages() { - BigInteger nrOfPages = new BigInteger("0"); + int nrOfPages = 0; long id = this.__getId(); Session session = this.wiki.__getHibernateSession(); session.beginTransaction(); Object returnValue = session.createNativeQuery("select count(pages) from category_pages where id = :id") - .setParameter("id", id, LongType.INSTANCE) + .setParameter("id", id, StandardBasicTypes.LONG) .uniqueResult(); session.getTransaction().commit(); if (returnValue != null) { - nrOfPages = (BigInteger) returnValue; + nrOfPages = ((Long) returnValue).intValue(); } - return nrOfPages.intValue(); + return nrOfPages; } /** diff --git a/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/CategoryGraph.java b/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/CategoryGraph.java index 797eb997f..f13f82a8d 100644 --- a/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/CategoryGraph.java +++ b/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/CategoryGraph.java @@ -36,10 +36,9 @@ import java.util.Map; import java.util.Set; -import org.jgrapht.DirectedGraph; -import org.jgrapht.UndirectedGraph; -import org.jgrapht.alg.ConnectivityInspector; -import org.jgrapht.alg.DijkstraShortestPath; +import org.jgrapht.GraphPath; +import org.jgrapht.alg.connectivity.ConnectivityInspector; +import org.jgrapht.alg.shortestpath.DijkstraShortestPath; import org.jgrapht.graph.AsUndirectedGraph; import org.jgrapht.graph.DefaultDirectedGraph; import org.jgrapht.graph.DefaultEdge; @@ -51,6 +50,7 @@ import de.tudarmstadt.ukp.wikipedia.util.ApiUtilities; import de.tudarmstadt.ukp.wikipedia.util.CommonUtilities; import de.tudarmstadt.ukp.wikipedia.util.OS; +import org.jgrapht.graph.DefaultUndirectedGraph; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -69,9 +69,9 @@ public class CategoryGraph implements WikiConstants, Serializable { private Wikipedia wiki; // the category graph - private DirectedGraph graph; + private DefaultDirectedGraph graph; // the category graph - private UndirectedGraph undirectedGraph; + private AsUndirectedGraph undirectedGraph; // a map holding the degree distribution of the graph private Map degreeDistribution; @@ -180,11 +180,11 @@ protected CategoryGraph(Wikipedia pWiki, Set pPageIDs) throws WikiApiEx constructCategoryGraph(pWiki, pPageIDs, null); } - public CategoryGraph(Wikipedia pWiki, DirectedGraph pGraph) throws WikiApiException { + public CategoryGraph(Wikipedia pWiki, DefaultDirectedGraph pGraph) throws WikiApiException { constructCategoryGraph(pWiki, pGraph); } - private void constructCategoryGraph(Wikipedia pWiki, DirectedGraph pGraph) throws WikiApiException { + private void constructCategoryGraph(Wikipedia pWiki, DefaultDirectedGraph pGraph) throws WikiApiException { this.wiki = pWiki; this.graph = pGraph; this.numberOfNodes = this.graph.vertexSet().size(); @@ -661,12 +661,12 @@ public int getPathLengthInEdges(Category node1, Category node2) { } // get the path from root node to node 1 - List edgeList = DijkstraShortestPath.findPathBetween(undirectedGraph, node1.getPageId(), node2.getPageId()); + GraphPath edgeList = DijkstraShortestPath.findPathBetween(undirectedGraph, node1.getPageId(), node2.getPageId()); if (edgeList == null) { return -1; } else { - return edgeList.size(); + return edgeList.getLength(); } } // if the given nodes are not in the category graph, return -1 @@ -1141,7 +1141,7 @@ public CategoryGraph getLargestConnectedComponent() throws WikiApiException { ConnectivityInspector connectInspect = new ConnectivityInspector(graph); // if the graph is connected, simply return the whole graph - if (connectInspect.isGraphConnected()) { + if (connectInspect.isConnected()) { return this; } @@ -1612,11 +1612,11 @@ public String getGraphInfo() { /** * @return Returns the graph. */ - public DirectedGraph getGraph() { + public DefaultDirectedGraph getGraph() { return graph; } - public UndirectedGraph getUndirectedGraph() + public AsUndirectedGraph getUndirectedGraph() { return undirectedGraph; } diff --git a/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/DatabaseConfiguration.java b/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/DatabaseConfiguration.java index 50c72931c..988c2756c 100644 --- a/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/DatabaseConfiguration.java +++ b/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/DatabaseConfiguration.java @@ -29,8 +29,8 @@ public class DatabaseConfiguration { private String user; private String password; private Language language; - private String jdbcURL; - private String databaseDriver; + private String jdbcURL; + private String databaseDriver; public DatabaseConfiguration() {} @@ -39,8 +39,8 @@ public DatabaseConfiguration() {} * * @param host The hostname the machine the database is hosted on. * @param database The name of the database to connect to. - * @param user The username as part of the credentials used for for authentication. - * @param password The password as part of the credentials used for for authentication. + * @param user The username as part of the credentials used for authentication. + * @param password The password as part of the credentials used for authentication. * @param language The {@link Language} used for the underlying connection. */ public DatabaseConfiguration(String host, String database, String user, String password, Language language) { @@ -50,14 +50,14 @@ public DatabaseConfiguration(String host, String database, String user, String p } /** - * A constructor for an explicit DBMS specific configuration, e.g. for HSQLDB in tests contexts. + * A constructor for an explicit DBMS specific configuration. * * @param databaseDriver The fully qualified name of the JDBC driver. * @param jdbcURL A valid JDBC url used to open connections. * @param host The hostname the machine the database is hosted on. * @param database The name of the database to connect to. - * @param user The username as part of the credentials used for for authentication. - * @param password The password as part of the credentials used for for authentication. + * @param user The username as part of the credentials used for authentication. + * @param password The password as part of the credentials used for authentication. * @param language The {@link Language} used for the underlying connection. */ public DatabaseConfiguration(String databaseDriver, String jdbcURL, String host, String database, String user, @@ -77,7 +77,7 @@ public DatabaseConfiguration(String databaseDriver, String jdbcURL, String host, */ boolean supportsCollation() { if(databaseDriver!=null) { - return databaseDriver.contains("mysql"); + return databaseDriver.contains("mysql") || databaseDriver.contains("mariadb"); } else { return false; } diff --git a/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/Page.java b/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/Page.java index 023349662..2d3db269e 100644 --- a/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/Page.java +++ b/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/Page.java @@ -17,15 +17,12 @@ */ package de.tudarmstadt.ukp.wikipedia.api; -import java.math.BigInteger; import java.util.HashSet; import java.util.Set; import org.hibernate.LockOptions; import org.hibernate.Session; -import org.hibernate.type.IntegerType; -import org.hibernate.type.LongType; -import org.hibernate.type.StringType; +import org.hibernate.type.StandardBasicTypes; import org.sweble.wikitext.engine.PageId; import org.sweble.wikitext.engine.PageTitle; import org.sweble.wikitext.engine.WtEngineImpl; @@ -176,7 +173,7 @@ private void fetchByPageId(int pageID) Session session = this.wiki.__getHibernateSession(); session.beginTransaction(); hibernatePage = (de.tudarmstadt.ukp.wikipedia.api.hibernate.Page) session - .createQuery("from Page where pageId = :id").setParameter("id", pageID, IntegerType.INSTANCE).uniqueResult(); + .createQuery("from Page where pageId = :id").setParameter("id", pageID, StandardBasicTypes.INTEGER).uniqueResult(); session.getTransaction().commit(); if (hibernatePage == null) { @@ -204,7 +201,7 @@ private void fetchByTitle(Title pTitle, boolean useExactTitle) Integer pageId = (Integer) session .createNativeQuery( "select pml.pageID from PageMapLine as pml where pml.name = :pagetitle LIMIT 1") - .setParameter("pagetitle", searchString, StringType.INSTANCE).uniqueResult(); + .setParameter("pagetitle", searchString, StandardBasicTypes.STRING).uniqueResult(); session.getTransaction().commit(); if (pageId == null) { @@ -288,20 +285,20 @@ public Set getCategories() */ public int getNumberOfCategories() { - BigInteger nrOfCategories = new BigInteger("0"); + int nrOfCategories = 0; long id = __getId(); Session session = wiki.__getHibernateSession(); session.beginTransaction(); Object returnValue = session .createNativeQuery("select count(pages) from page_categories where id = :pageid") - .setParameter("pageid", id, LongType.INSTANCE).uniqueResult(); + .setParameter("pageid", id, StandardBasicTypes.LONG).uniqueResult(); session.getTransaction().commit(); if (returnValue != null) { - nrOfCategories = (BigInteger) returnValue; + nrOfCategories = ((Long) returnValue).intValue(); } - return nrOfCategories.intValue(); + return nrOfCategories; } /** @@ -343,20 +340,20 @@ public Set getInlinks() */ public int getNumberOfInlinks() { - BigInteger nrOfInlinks = new BigInteger("0"); + int nrOfInlinks = 0; long id = __getId(); Session session = wiki.__getHibernateSession(); session.beginTransaction(); Object returnValue = session .createNativeQuery("select count(pi.inLinks) from page_inlinks as pi where pi.id = :piid") - .setParameter("piid", id, LongType.INSTANCE).uniqueResult(); + .setParameter("piid", id, StandardBasicTypes.LONG).uniqueResult(); session.getTransaction().commit(); if (returnValue != null) { - nrOfInlinks = (BigInteger) returnValue; + nrOfInlinks = ((Long) returnValue).intValue(); } - return nrOfInlinks.intValue(); + return nrOfInlinks; } /** @@ -419,20 +416,20 @@ public Set getOutlinks() */ public int getNumberOfOutlinks() { - BigInteger nrOfOutlinks = new BigInteger("0"); + int nrOfOutlinks = 0; long id = __getId(); Session session = wiki.__getHibernateSession(); session.beginTransaction(); Object returnValue = session .createNativeQuery("select count(outLinks) from page_outlinks where id = :id") - .setParameter("id", id, LongType.INSTANCE).uniqueResult(); + .setParameter("id", id, StandardBasicTypes.LONG).uniqueResult(); session.getTransaction().commit(); if (returnValue != null) { - nrOfOutlinks = (BigInteger) returnValue; + nrOfOutlinks = ((Long) returnValue).intValue(); } - return nrOfOutlinks.intValue(); + return nrOfOutlinks; } /** diff --git a/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/PageIterator.java b/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/PageIterator.java index 6a496a72e..72fe18196 100644 --- a/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/PageIterator.java +++ b/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/PageIterator.java @@ -30,7 +30,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import javax.persistence.Query; +import jakarta.persistence.Query; /** * An iterator over {@link Page} objects. diff --git a/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/Wikipedia.java b/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/Wikipedia.java index 3cd592e5c..ea0e2cc7a 100644 --- a/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/Wikipedia.java +++ b/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/Wikipedia.java @@ -23,8 +23,6 @@ import org.hibernate.Session; import org.hibernate.query.Query; -import org.hibernate.type.IntegerType; -import org.hibernate.type.StringType; import de.tudarmstadt.ukp.wikipedia.api.exception.WikiApiException; import de.tudarmstadt.ukp.wikipedia.api.exception.WikiInitializationException; @@ -32,6 +30,7 @@ import de.tudarmstadt.ukp.wikipedia.api.exception.WikiTitleParsingException; import de.tudarmstadt.ukp.wikipedia.api.hibernate.WikiHibernateUtil; import de.tudarmstadt.ukp.wikipedia.util.distance.LevenshteinStringDistance; +import org.hibernate.type.StandardBasicTypes; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.sweble.wikitext.engine.config.WikiConfig; @@ -47,7 +46,7 @@ public class Wikipedia implements WikiConstants { private static final Logger logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); // Note well: The whitespace at the beginning of this constant is here on purpose. Do NOT remove it! - static final String SQL_COLLATION = " COLLATE utf8_bin"; + static final String SQL_COLLATION = " COLLATE utf8mb4_bin"; /*" COLLATE utf8_bin";*/ private final Language language; private final DatabaseConfiguration dbConfig; @@ -175,7 +174,7 @@ public Title getTitle(int pageId) throws WikiApiException { Session session = this.__getHibernateSession(); session.beginTransaction(); Object returnValue = session.createNativeQuery( - "select p.name from PageMapLine as p where p.pageId= :pId").setParameter("pId", pageId, IntegerType.INSTANCE).uniqueResult(); + "select p.name from PageMapLine as p where p.pageId= :pId").setParameter("pId", pageId, StandardBasicTypes.INTEGER).uniqueResult(); session.getTransaction().commit(); String title = (String)returnValue; @@ -197,7 +196,7 @@ public List getPageIds(String title) throws WikiApiException { Session session = this.__getHibernateSession(); session.beginTransaction(); Iterator results = session.createQuery( - "select p.pageID from PageMapLine as p where p.name = :pName").setParameter("pName", title, StringType.INSTANCE).list().iterator(); + "select p.pageID from PageMapLine as p where p.name = :pName").setParameter("pName", title, StandardBasicTypes.STRING).list().iterator(); session.getTransaction().commit(); @@ -226,7 +225,7 @@ public List getPageIdsCaseInsensitive(String title) throws WikiApiExcep Session session = this.__getHibernateSession(); session.beginTransaction(); Iterator results = session.createQuery( - "select p.pageID from PageMapLine as p where lower(p.name) = :pName").setParameter("pName", title, StringType.INSTANCE).list().iterator(); + "select p.pageID from PageMapLine as p where lower(p.name) = :pName").setParameter("pName", title, StandardBasicTypes.STRING).list().iterator(); session.getTransaction().commit(); @@ -366,7 +365,7 @@ public Iterable getDiscussionArchives(Page articlePage) throws WikiApiExce List discussionArchives = new LinkedList(); Query query = session.createQuery("SELECT pageID FROM PageMapLine where name like :name"); - query.setParameter("name", articleTitle+"/%", StringType.INSTANCE); + query.setParameter("name", articleTitle+"/%", StandardBasicTypes.STRING); Iterator results = query.list().iterator(); session.getTransaction().commit(); @@ -668,7 +667,7 @@ public boolean existsPage(String title) { query += SQL_COLLATION; } Object returnValue = session.createNativeQuery(query) - .setParameter("pName", encodedTitle, StringType.INSTANCE) + .setParameter("pName", encodedTitle, StandardBasicTypes.STRING) .uniqueResult(); session.getTransaction().commit(); @@ -696,7 +695,7 @@ public boolean existsPage(int pageID) { session.beginTransaction(); List returnList = session.createNativeQuery( "select p.id from PageMapLine as p where p.pageID = :pageId") - .setParameter("pageId", pageID, IntegerType.INSTANCE) + .setParameter("pageId", pageID, StandardBasicTypes.INTEGER) .list(); session.getTransaction().commit(); @@ -724,7 +723,7 @@ protected long __getPageHibernateId(int pageID) { session.beginTransaction(); Object retObjectPage = session.createQuery( "select page.id from Page as page where page.pageId = :pageId") - .setParameter("pageId", pageID, IntegerType.INSTANCE) + .setParameter("pageId", pageID, StandardBasicTypes.INTEGER) .uniqueResult(); session.getTransaction().commit(); if (retObjectPage != null) { @@ -758,7 +757,7 @@ protected long __getCategoryHibernateId(int pageID) { session.beginTransaction(); Object retObjectPage = session.createQuery( "select cat.id from Category as cat where cat.pageId = :pageId") - .setParameter("pageId", pageID, IntegerType.INSTANCE) + .setParameter("pageId", pageID, StandardBasicTypes.INTEGER) .uniqueResult(); session.getTransaction().commit(); if (retObjectPage != null) { diff --git a/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/hibernate/WikiHibernateUtil.java b/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/hibernate/WikiHibernateUtil.java index 3e3b49cf6..55b85933f 100644 --- a/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/hibernate/WikiHibernateUtil.java +++ b/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/hibernate/WikiHibernateUtil.java @@ -67,20 +67,23 @@ private static Properties getProperties(DatabaseConfiguration config) { Properties p = new Properties(); boolean useMySQL = false; + boolean useMariaDB = false; boolean useHSQL = false; // XXX other dialects might be interesting here as well... if(jdbcURL.toLowerCase().contains("mysql")) { useMySQL = true; - } - else if(jdbcURL.toLowerCase().contains("hsql")) { + } else if(jdbcURL.toLowerCase().contains("mariadb")) { + useMariaDB = true; + } else if(jdbcURL.toLowerCase().contains("hsql")) { useHSQL = true; } // SQL dialect if(useMySQL) { p.setProperty("hibernate.dialect","org.hibernate.dialect.MySQLDialect"); - } - if(useHSQL) { + } else if(useMariaDB) { + p.setProperty("hibernate.dialect","org.hibernate.dialect.MariaDBDialect"); + } else if(useHSQL) { p.setProperty("hibernate.dialect","org.hibernate.dialect.HSQLDialect"); } @@ -97,7 +100,7 @@ else if(jdbcURL.toLowerCase().contains("hsql")) { p.setProperty("hibernate.connection.password", password); // JDBC connection pool (use the built-in) --> - p.setProperty("hibernate.connection.pool_size","1"); + p.setProperty("hibernate.connection.pool_size","5"); // Enable Hibernate's automatic session context management p.setProperty("hibernate.current_session_context_class","thread"); @@ -109,9 +112,10 @@ else if(jdbcURL.toLowerCase().contains("hsql")) { p.setProperty("hibernate.show_sql","false"); // Do only update schema on changes - if(useMySQL) { + if(useMySQL || useMariaDB) { p.setProperty("hibernate.hbm2ddl.auto","validate"); } + if(useHSQL) { p.setProperty("hibernate.hbm2ddl.auto","none"); } @@ -129,7 +133,6 @@ else if(jdbcURL.toLowerCase().contains("hsql")) { p.setProperty("hibernate.c3p0.max_size","15"); p.setProperty("hibernate.c3p0.max_statements","100"); p.setProperty("hibernate.c3p0.timeout","1000"); - } return p; } diff --git a/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/util/GraphSerialization.java b/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/util/GraphSerialization.java index ddb75c263..8a42be2c7 100644 --- a/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/util/GraphSerialization.java +++ b/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/util/GraphSerialization.java @@ -26,11 +26,11 @@ import java.io.ObjectInputStream; import java.io.ObjectOutputStream; -import org.jgrapht.DirectedGraph; +import org.jgrapht.graph.DefaultDirectedGraph; import org.jgrapht.graph.DefaultEdge; /** - * Utility for serializing and deserializing {@link DirectedGraph} objects, that are
+ * Utility for serializing and deserializing {@link DefaultDirectedGraph} objects, that are
* wrapped into {@link SerializableDirectedGraph} objects. * */ @@ -43,13 +43,13 @@ private GraphSerialization() { } /** - * Serializes the given {@link DirectedGraph} object to the given location. + * Serializes the given {@link DefaultDirectedGraph} object to the given location. * * @param graph Must not be {@code null}. * @param location Must not be {@code null} and a valid file path. * @throws IOException Thrown if errors occurred on the IO level. */ - public static void saveGraph(DirectedGraph graph, String location) throws IOException { + public static void saveGraph(DefaultDirectedGraph graph, String location) throws IOException { File file = new File(location); file.createNewFile(); if (!file.canWrite()) { @@ -59,13 +59,13 @@ public static void saveGraph(DirectedGraph graph, String l } /** - * Serializes the given {@link DirectedGraph} object to the given location. + * Serializes the given {@link DefaultDirectedGraph} object to the given location. * * @param graph Must not be {@code null}. * @param file Must not be {@code null} and valid {@link File}. * @throws IOException Thrown if errors occurred on the IO level. */ - public static void saveGraph(DirectedGraph graph, File file) throws IOException { + public static void saveGraph(DefaultDirectedGraph graph, File file) throws IOException { SerializableDirectedGraph serialGraph = new SerializableDirectedGraph(graph); BufferedOutputStream fos; ObjectOutputStream out; @@ -78,16 +78,16 @@ public static void saveGraph(DirectedGraph graph, File fil /** * Deserializes a {@link SerializableDirectedGraph} object that is stored in the - * given location. This method returns the {@link DirectedGraph} object, that is wrapped + * given location. This method returns the {@link DefaultDirectedGraph} object, that is wrapped * in the {@link SerializableDirectedGraph}. * * @param location Must not be {@code null} and a valid file path. - * @return The {@link DirectedGraph} object, that is wrapped in the + * @return The {@link DefaultDirectedGraph} object, that is wrapped in the * {@link SerializableDirectedGraph}. * @throws IOException Thrown if errors occurred on the IO level. * @throws ClassNotFoundException Thrown if a class could not be find while deserialization. */ - public static DirectedGraph loadGraph(String location) + public static DefaultDirectedGraph loadGraph(String location) throws IOException, ClassNotFoundException { File file = new File(location); if (!file.canWrite()) { @@ -98,16 +98,16 @@ public static DirectedGraph loadGraph(String location) /** * Deserializes a {@link SerializableDirectedGraph} object that is stored in the - * given location. This method returns the {@link DirectedGraph} object, that is wrapped + * given location. This method returns the {@link DefaultDirectedGraph} object, that is wrapped * in the {@link SerializableDirectedGraph}. - * + * * @param file Must not be {@code null} and valid {@link File}. - * @return The {@link DirectedGraph} object, that is wrapped in the + * @return The {@link DefaultDirectedGraph} object, that is wrapped in the * {@link SerializableDirectedGraph}. * @throws IOException Thrown if errors occurred on the IO level. * @throws ClassNotFoundException Thrown if a class could not be find while deserialization. */ - public static DirectedGraph loadGraph(File file) throws IOException, ClassNotFoundException { + public static DefaultDirectedGraph loadGraph(File file) throws IOException, ClassNotFoundException { SerializableDirectedGraph serialGraph; BufferedInputStream fin; ObjectInputStream in; diff --git a/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/util/SerializableDirectedGraph.java b/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/util/SerializableDirectedGraph.java index 08691d4e4..2905bc685 100644 --- a/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/util/SerializableDirectedGraph.java +++ b/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/util/SerializableDirectedGraph.java @@ -19,7 +19,7 @@ import java.io.Serializable; -import org.jgrapht.DirectedGraph; +import org.jgrapht.graph.DefaultDirectedGraph; import org.jgrapht.graph.DefaultEdge; /** @@ -35,14 +35,14 @@ public final class SerializableDirectedGraph implements Serializable { */ private static final long serialVersionUID = -8298189410676038723L; - private DirectedGraph graph; + private DefaultDirectedGraph graph; /** * This Constructor is intended to be used before the serialization of the
* directed graph. * @param graph */ - public SerializableDirectedGraph(DirectedGraph graph){ + public SerializableDirectedGraph(DefaultDirectedGraph graph){ this.graph = graph; } @@ -50,7 +50,7 @@ public SerializableDirectedGraph(DirectedGraph graph){ * Returns the graph. * @return */ - public DirectedGraph getGraph(){ + public DefaultDirectedGraph getGraph(){ return graph; } } diff --git a/de.tudarmstadt.ukp.wikipedia.api/src/test/java/de/tudarmstadt/ukp/wikipedia/api/util/GraphSerializationTest.java b/de.tudarmstadt.ukp.wikipedia.api/src/test/java/de/tudarmstadt/ukp/wikipedia/api/util/GraphSerializationTest.java index d9d9fe1f9..bca6edc31 100644 --- a/de.tudarmstadt.ukp.wikipedia.api/src/test/java/de/tudarmstadt/ukp/wikipedia/api/util/GraphSerializationTest.java +++ b/de.tudarmstadt.ukp.wikipedia.api/src/test/java/de/tudarmstadt/ukp/wikipedia/api/util/GraphSerializationTest.java @@ -24,14 +24,13 @@ import java.io.File; import de.tudarmstadt.ukp.wikipedia.api.*; -import org.jgrapht.DirectedGraph; +import org.jgrapht.graph.DefaultDirectedGraph; import org.jgrapht.graph.DefaultEdge; import org.junit.After; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; -import de.tudarmstadt.ukp.wikipedia.api.WikiConstants.Language; import junit.framework.JUnit4TestAdapter; import junit.textui.TestRunner; @@ -106,7 +105,7 @@ public void testGraphSerialization() * graphs are identical. * @param graph */ - private void testGraph(DirectedGraph graph){ + private void testGraph(DefaultDirectedGraph graph){ //make sure all vertices are there for(int i=1;i<16;i++){ if(!graph.containsVertex(i)) { diff --git a/de.tudarmstadt.ukp.wikipedia.api/src/test/resources/log4j.properties b/de.tudarmstadt.ukp.wikipedia.api/src/test/resources/log4j.properties deleted file mode 100644 index f80f73b3a..000000000 --- a/de.tudarmstadt.ukp.wikipedia.api/src/test/resources/log4j.properties +++ /dev/null @@ -1,72 +0,0 @@ -# Licensed to the Technische Universität Darmstadt under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The Technische Universität Darmstadt -# licenses this file to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -### direct log messages to stdout ### -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.Target=System.out -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n - -### direct messages to file console.log ### -log4j.appender.file=org.apache.log4j.FileAppender -log4j.appender.file.File=console.log -log4j.appender.file.layout=org.apache.log4j.PatternLayout -log4j.appender.file.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n - -### set log levels - for more verbose logging change 'info' to 'debug' ### - -#log4j.rootLogger=info, file, stdout -log4j.rootLogger=info, stdout - -log4j.logger.de.tudarmstadt.ukp.wikipedia.api=warn -log4j.logger.de.tudarmstadt.ukp.wikipedia.util=warn - -log4j.logger.org.hibernate=warn -log4j.logger.org.hibernate.cfg=info -#log4j.logger.org.hibernate=info -#log4j.logger.org.hibernate=debug - -### log HQL query parser activity -#log4j.logger.org.hibernate.hql.ast.AST=debug - -### log just the SQL -#log4j.logger.org.hibernate.SQL=debug - -### log JDBC bind parameters ### -log4j.logger.org.hibernate.type=info -#log4j.logger.org.hibernate.type=debug - -### log schema export/update ### -log4j.logger.org.hibernate.tool.hbm2ddl=debug - -### log HQL parse trees -#log4j.logger.org.hibernate.hql=debug - -### log cache activity ### -#log4j.logger.org.hibernate.cache=debug - -### log transaction activity -#log4j.logger.org.hibernate.transaction=debug - -### log JDBC resource acquisition -#log4j.logger.org.hibernate.jdbc=debug - -### log only errors of internal JDBC connection provider -log4j.logger.org.hibernate.engine.jdbc.connections.internal=error - -### enable the following line if you want to track down connection ### -### leakages when using DriverManagerConnectionProvider ### -#log4j.logger.org.hibernate.connection.DriverManagerConnectionProvider=trace diff --git a/de.tudarmstadt.ukp.wikipedia.api/src/test/resources/log4j2.xml b/de.tudarmstadt.ukp.wikipedia.api/src/test/resources/log4j2.xml new file mode 100644 index 000000000..c924a1c67 --- /dev/null +++ b/de.tudarmstadt.ukp.wikipedia.api/src/test/resources/log4j2.xml @@ -0,0 +1,39 @@ + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/de.tudarmstadt.ukp.wikipedia.datamachine/pom.xml b/de.tudarmstadt.ukp.wikipedia.datamachine/pom.xml index e75119dcf..b96a05f12 100644 --- a/de.tudarmstadt.ukp.wikipedia.datamachine/pom.xml +++ b/de.tudarmstadt.ukp.wikipedia.datamachine/pom.xml @@ -20,7 +20,7 @@ de.tudarmstadt.ukp.wikipedia de.tudarmstadt.ukp.wikipedia - 1.2.0-SNAPSHOT + 2.0.0-SNAPSHOT 4.0.0 de.tudarmstadt.ukp.wikipedia.datamachine @@ -30,23 +30,23 @@ de.tudarmstadt.ukp.wikipedia de.tudarmstadt.ukp.wikipedia.wikimachine - - net.sf.trove4j - trove4j - de.tudarmstadt.ukp.wikipedia de.tudarmstadt.ukp.wikipedia.mwdumper + + it.unimi.dsi + fastutil-core + - org.slf4j - slf4j-log4j12 + org.apache.logging.log4j + log4j-slf4j-impl provided - log4j - log4j + org.apache.logging.log4j + log4j-core provided diff --git a/de.tudarmstadt.ukp.wikipedia.datamachine/src/main/java/de/tudarmstadt/ukp/wikipedia/datamachine/dump/version/SingleDumpVersionJDKGeneric.java b/de.tudarmstadt.ukp.wikipedia.datamachine/src/main/java/de/tudarmstadt/ukp/wikipedia/datamachine/dump/version/SingleDumpVersionJDKGeneric.java index 1ae876868..a6fd2b5ee 100644 --- a/de.tudarmstadt.ukp.wikipedia.datamachine/src/main/java/de/tudarmstadt/ukp/wikipedia/datamachine/dump/version/SingleDumpVersionJDKGeneric.java +++ b/de.tudarmstadt.ukp.wikipedia.datamachine/src/main/java/de/tudarmstadt/ukp/wikipedia/datamachine/dump/version/SingleDumpVersionJDKGeneric.java @@ -20,7 +20,9 @@ import java.io.IOException; import java.sql.Timestamp; import java.util.HashMap; +import java.util.HashSet; import java.util.Map; +import java.util.Set; import de.tudarmstadt.ukp.wikipedia.wikimachine.dump.sql.CategorylinksParser; import de.tudarmstadt.ukp.wikipedia.wikimachine.dump.sql.PagelinksParser; @@ -31,8 +33,6 @@ import de.tudarmstadt.ukp.wikipedia.wikimachine.hashing.IStringHashCode; import de.tudarmstadt.ukp.wikipedia.wikimachine.util.Redirects; import de.tudarmstadt.ukp.wikipedia.wikimachine.util.TxtFileWriter; -import gnu.trove.map.hash.TIntIntHashMap; -import gnu.trove.set.hash.TIntHashSet; public class SingleDumpVersionJDKGeneric extends AbstractDumpVersion { @@ -44,12 +44,12 @@ public class SingleDumpVersionJDKGeneric pPageIdNameMap; - private TIntHashSet cPageIdNameMap; + private Set cPageIdNameMap; private Map pNamePageIdMap; private Map cNamePageIdMap; private Map rPageIdNameMap; - private TIntHashSet disambiguations; - private TIntIntHashMap textIdPageIdMap; + private Set disambiguations; + private Map textIdPageIdMap; IStringHashCode hashAlgorithm; @@ -107,12 +107,12 @@ public void freeAfterTextParsing() { @Override public void initialize(Timestamp timestamp) { pPageIdNameMap = new HashMap(1_000_000); - cPageIdNameMap = new TIntHashSet(1_000_000); + cPageIdNameMap = new HashSet<>(1_000_000); pNamePageIdMap = new HashMap(1_000_000); cNamePageIdMap = new HashMap(1_000_000); rPageIdNameMap = new HashMap(1_000_000); - disambiguations = new TIntHashSet(1_000_000); - textIdPageIdMap = new TIntIntHashMap(1_000_000); + disambiguations = new HashSet<>(1_000_000); + textIdPageIdMap = new HashMap(1_000_000); } @SuppressWarnings("unchecked") diff --git a/de.tudarmstadt.ukp.wikipedia.datamachine/src/main/java/de/tudarmstadt/ukp/wikipedia/datamachine/dump/version/SingleDumpVersionOriginal.java b/de.tudarmstadt.ukp.wikipedia.datamachine/src/main/java/de/tudarmstadt/ukp/wikipedia/datamachine/dump/version/SingleDumpVersionOriginal.java index 7bf40c54d..be9b24994 100644 --- a/de.tudarmstadt.ukp.wikipedia.datamachine/src/main/java/de/tudarmstadt/ukp/wikipedia/datamachine/dump/version/SingleDumpVersionOriginal.java +++ b/de.tudarmstadt.ukp.wikipedia.datamachine/src/main/java/de/tudarmstadt/ukp/wikipedia/datamachine/dump/version/SingleDumpVersionOriginal.java @@ -36,6 +36,9 @@ import de.tudarmstadt.ukp.wikipedia.wikimachine.dump.xml.TextParser; import de.tudarmstadt.ukp.wikipedia.wikimachine.util.Redirects; import de.tudarmstadt.ukp.wikipedia.wikimachine.util.TxtFileWriter; +import it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap; +import it.unimi.dsi.fastutil.ints.IntArraySet; +import it.unimi.dsi.fastutil.ints.IntSet; /** * Transforms a database from mediawiki format to JWPL format.
@@ -71,9 +74,9 @@ public class SingleDumpVersionOriginal implements IDumpVersion { // their page id's. private Map rPageIdNameMap;// maps page id's of redirects // to their names. - private Set disambiguations; // caches the page id's of + private IntSet disambiguations; // caches the page id's of // disambiguation pages. - private Map textIdPageIdMap;// maps text id's to the page + private Int2IntOpenHashMap textIdPageIdMap;// maps text id's to the page // id's. @@ -221,13 +224,13 @@ public void initTextParsing() throws IOException { @Override public void initialize(Timestamp timestamp) { - this.pPageIdNameMap = new HashMap(); - this.cPageIdNameMap = new HashMap(); - this.pNamePageIdMap = new HashMap(); - this.cNamePageIdMap = new HashMap(); - this.rPageIdNameMap = new HashMap(); - this.disambiguations = new HashSet(); - this.textIdPageIdMap = new HashMap(); + this.pPageIdNameMap = new HashMap<>(); + this.cPageIdNameMap = new HashMap<>(); + this.pNamePageIdMap = new HashMap<>(); + this.cNamePageIdMap = new HashMap<>(); + this.rPageIdNameMap = new HashMap<>(); + this.disambiguations = new IntArraySet(); + this.textIdPageIdMap = new Int2IntOpenHashMap(); } @@ -240,8 +243,8 @@ public void processCategoryLinksRow(CategorylinksParser clParser) cl_from = clParser.getClFrom(); cl_to = clParser.getClTo(); - if (!cNamePageIdMap.containsKey(cl_to)) {// discard links with non - // registred targets + if (!cNamePageIdMap.containsKey(cl_to)) { + // discard links with non-registered targets return; } // if the link source is a page then write the link in diff --git a/de.tudarmstadt.ukp.wikipedia.datamachine/src/main/resources/context/applicationContext.xml b/de.tudarmstadt.ukp.wikipedia.datamachine/src/main/resources/context/applicationContext.xml index 22154f118..c5a4aad6f 100644 --- a/de.tudarmstadt.ukp.wikipedia.datamachine/src/main/resources/context/applicationContext.xml +++ b/de.tudarmstadt.ukp.wikipedia.datamachine/src/main/resources/context/applicationContext.xml @@ -67,7 +67,7 @@ de.tudarmstadt.ukp.wikipedia.timemachine.dump.version.DumpVersionJDKIntKeyFactory (bean id="dumpVersionFactory") de.tudarmstadt.ukp.wikipedia.timemachine.dump.version.DumpVersionJDKLongKeyFactory (bean id="dumpVersionFactory") de.tudarmstadt.ukp.wikipedia.timemachine.dump.version.DumpVersionJDKStringKeyFactory (bean id="dumpVersionFactory") - de.tudarmstadt.ukp.wikipedia.timemachine.dump.version.DumpVersionTroveIntKey (bean id="dumpVersion" without factory) [default] + de.tudarmstadt.ukp.wikipedia.timemachine.dump.version.DumpVersionFastUtilIntKey (bean id="dumpVersion" without factory) [default] de.tudarmstadt.ukp.wikipedia.timemachine.dump.version.OriginalDumpVersion (bean id="dumpVersion" without factory) --> diff --git a/de.tudarmstadt.ukp.wikipedia.datamachine/src/main/resources/log4j.xml b/de.tudarmstadt.ukp.wikipedia.datamachine/src/main/resources/log4j.xml deleted file mode 100644 index f18984d65..000000000 --- a/de.tudarmstadt.ukp.wikipedia.datamachine/src/main/resources/log4j.xml +++ /dev/null @@ -1,61 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/de.tudarmstadt.ukp.wikipedia.datamachine/src/main/resources/log4j2.xml b/de.tudarmstadt.ukp.wikipedia.datamachine/src/main/resources/log4j2.xml new file mode 100644 index 000000000..1fe225c32 --- /dev/null +++ b/de.tudarmstadt.ukp.wikipedia.datamachine/src/main/resources/log4j2.xml @@ -0,0 +1,35 @@ + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/de.tudarmstadt.ukp.wikipedia.datamachine/src/test/resources/log4j.properties b/de.tudarmstadt.ukp.wikipedia.datamachine/src/test/resources/log4j2.properties similarity index 100% rename from de.tudarmstadt.ukp.wikipedia.datamachine/src/test/resources/log4j.properties rename to de.tudarmstadt.ukp.wikipedia.datamachine/src/test/resources/log4j2.properties diff --git a/de.tudarmstadt.ukp.wikipedia.mwdumper/pom.xml b/de.tudarmstadt.ukp.wikipedia.mwdumper/pom.xml index e0568c825..9b62019e8 100644 --- a/de.tudarmstadt.ukp.wikipedia.mwdumper/pom.xml +++ b/de.tudarmstadt.ukp.wikipedia.mwdumper/pom.xml @@ -20,7 +20,7 @@ de.tudarmstadt.ukp.wikipedia de.tudarmstadt.ukp.wikipedia - 1.2.0-SNAPSHOT + 2.0.0-SNAPSHOT de.tudarmstadt.ukp.wikipedia.mwdumper MediaWiki Dumper @@ -34,16 +34,14 @@ + + org.apache.commons + commons-compress + junit junit test - - org.apache.ant - ant - jar - compile - - + \ No newline at end of file diff --git a/de.tudarmstadt.ukp.wikipedia.mwdumper/src/main/java/de/tudarmstadt/ukp/wikipedia/mwdumper/dumper/Tools.java b/de.tudarmstadt.ukp.wikipedia.mwdumper/src/main/java/de/tudarmstadt/ukp/wikipedia/mwdumper/dumper/Tools.java index 3e5c282ab..732b40040 100644 --- a/de.tudarmstadt.ukp.wikipedia.mwdumper/src/main/java/de/tudarmstadt/ukp/wikipedia/mwdumper/dumper/Tools.java +++ b/de.tudarmstadt.ukp.wikipedia.mwdumper/src/main/java/de/tudarmstadt/ukp/wikipedia/mwdumper/dumper/Tools.java @@ -17,6 +17,9 @@ */ package de.tudarmstadt.ukp.wikipedia.mwdumper.dumper; +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream; + import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.File; @@ -28,9 +31,6 @@ import java.io.OutputStream; import java.util.zip.GZIPInputStream; -import org.apache.tools.bzip2.CBZip2InputStream; -import org.apache.tools.bzip2.CBZip2OutputStream; - public class Tools { static final int IN_BUF_SZ = 1024 * 1024; private static final int OUT_BUF_SZ = 1024 * 1024; @@ -61,7 +61,7 @@ static InputStream openBZip2Stream(InputStream infile) throws IOException { if (first != 'B' || second != 'Z') { throw new IOException("Didn't find BZ file signature in .bz2 file"); } - return new CBZip2InputStream(infile); + return new BZip2CompressorInputStream(infile); } static OutputStream openStandardOutput() { @@ -73,7 +73,7 @@ static OutputStream createBZip2File(String param) throws IOException, FileNotFou // bzip2 expects a two-byte 'BZ' signature header outfile.write('B'); outfile.write('Z'); - return new CBZip2OutputStream(outfile); + return new BZip2CompressorOutputStream(outfile); } static OutputStream createOutputFile(String param) throws IOException, FileNotFoundException { diff --git a/de.tudarmstadt.ukp.wikipedia.parser/pom.xml b/de.tudarmstadt.ukp.wikipedia.parser/pom.xml index a95eadae5..96f3050a9 100644 --- a/de.tudarmstadt.ukp.wikipedia.parser/pom.xml +++ b/de.tudarmstadt.ukp.wikipedia.parser/pom.xml @@ -21,7 +21,7 @@ de.tudarmstadt.ukp.wikipedia de.tudarmstadt.ukp.wikipedia - 1.2.0-SNAPSHOT + 2.0.0-SNAPSHOT de.tudarmstadt.ukp.wikipedia.parser MediaWiki Parser @@ -44,13 +44,13 @@ Please use the SWEBLE parser (http://sweble.org/) for parsing MediaWiki markup. slf4j-api - org.slf4j - slf4j-log4j12 + org.apache.logging.log4j + log4j-slf4j-impl test - log4j - log4j + org.apache.logging.log4j + log4j-core test diff --git a/de.tudarmstadt.ukp.wikipedia.parser/src/main/java/de/tudarmstadt/ukp/wikipedia/parser/statistics/Statistics.java b/de.tudarmstadt.ukp.wikipedia.parser/src/main/java/de/tudarmstadt/ukp/wikipedia/parser/statistics/Statistics.java deleted file mode 100644 index 55b632add..000000000 --- a/de.tudarmstadt.ukp.wikipedia.parser/src/main/java/de/tudarmstadt/ukp/wikipedia/parser/statistics/Statistics.java +++ /dev/null @@ -1,305 +0,0 @@ -/* - * Licensed to the Technische Universität Darmstadt under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The Technische Universität Darmstadt - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package de.tudarmstadt.ukp.wikipedia.parser.statistics; - -import java.io.BufferedWriter; -import java.io.FileWriter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Date; -import java.util.Iterator; -import java.util.List; - -import de.tudarmstadt.ukp.wikipedia.api.DatabaseConfiguration; -import de.tudarmstadt.ukp.wikipedia.api.Page; -import de.tudarmstadt.ukp.wikipedia.api.WikiConstants.Language; -import de.tudarmstadt.ukp.wikipedia.api.Wikipedia; -import de.tudarmstadt.ukp.wikipedia.parser.ParsedPage; -import de.tudarmstadt.ukp.wikipedia.parser.SectionContainer; -import de.tudarmstadt.ukp.wikipedia.parser.Table; -import de.tudarmstadt.ukp.wikipedia.parser.TableElement; -import de.tudarmstadt.ukp.wikipedia.parser.Template; -import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParser; -import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParserFactory; -import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.ShowTemplateNamesAndParameters; - -/** - * This creates a Detailed Statistic file for Tables and Templates. - * - */ -public class Statistics { - - // Constants - public static final String path = "./data/parsedpage/statistics/"; - - // Variables - static long nrOfPages; - static int nrOfTables; - static int nrOfTemplates; - static int nrOfAnalyzedPages; - static List templateNrOfOccurence; - static List templateNameOfFirstOccurence; - static List templateNames; - static List pagesWithTableSections; - - // Debug - static final int skipPages = 0; - static final long offsetTime = 0; //1000 Sec/65536 Pages - static final boolean debug = false; - - public static void main( String[] argv) throws Exception{ - // configure the database connection parameters - DatabaseConfiguration dbConfig = new DatabaseConfiguration(); - dbConfig.setDatabase("wikiapi_en"); - dbConfig.setHost("bender.ukp.informatik.tu-darmstadt.de"); - dbConfig.setUser("student"); - dbConfig.setPassword("student"); - dbConfig.setLanguage(Language.english); - - Wikipedia wiki = new Wikipedia(dbConfig); - - MediaWikiParserFactory pf = new MediaWikiParserFactory( ); - pf.setTemplateParserClass( ShowTemplateNamesAndParameters.class ); - pf.setShowImageText( true ); - pf.setShowMathTagContent( true ); - pf.setDeleteTags( false ); - pf.getImageIdentifers().add("IMAGE"); - pf.setCalculateSrcSpans( false ); - - MediaWikiParser parser = pf.createParser(); - - Iterator pageIt = wiki.getArticles().iterator(); - - nrOfPages = wiki.getMetaData().getNumberOfPages(); - nrOfTables = 0; - nrOfTemplates = 0; - templateNames = new ArrayList(); - templateNameOfFirstOccurence = new ArrayList(); - templateNrOfOccurence = new ArrayList(); - pagesWithTableSections = new ArrayList(); - - long startTime = new Date().getTime(); - - nrOfAnalyzedPages = 0; - - System.out.println("ANALYSING ..."); - while (pageIt.hasNext()) { - Page currentPage = pageIt.next(); - nrOfAnalyzedPages++; - - //For Debugging purposes... - if( nrOfAnalyzedPages < skipPages+1 ){ - System.out.println("Skipped: "+ currentPage.getPageId()); - continue; - } - - //Screen Info - if( nrOfAnalyzedPages % 1024 == 0){ - long aktualTime = new Date().getTime(); - long runnedTime = aktualTime - startTime + offsetTime; - long totalTime = (runnedTime * nrOfPages) / nrOfAnalyzedPages; - - System.out.println( - percentString(nrOfAnalyzedPages,nrOfPages)+ - " -> "+nrOfAnalyzedPages+" of "+nrOfPages+" pages in "+ runnedTime/1000+"sec"+ - " -> "+(totalTime-runnedTime)/60000+"min left" - ); - } - - //Parsing - String name = currentPage.getTitle().getPlainTitle(); - String src = currentPage.getText(); - - if(debug) System.out.println( " "+currentPage.getPageId()+" "+name ); - - ParsedPage pp = parser.parse(src); - if (pp==null) { - // this is an Error, wich occures when src="" - continue; - } - - pp.setName(name); - - - //Template Analysis - for( Template t: pp.getTemplates()){ - nrOfTemplates++; - String templateName = t.getName().toLowerCase(); - if( templateName.startsWith("vorlage:") )templateName = templateName.substring(8); - else if( templateName.startsWith("template:") )templateName = templateName.substring(9); - - int pos = templateNames.indexOf( templateName ); - if( pos != -1 ){ - templateNrOfOccurence.set( pos, templateNrOfOccurence.get( pos )+1 ); - } - else{ - templateNrOfOccurence.add(1); - templateNames.add( templateName ); - templateNameOfFirstOccurence.add( pp.getName() ); - List temp = new ArrayList(); - temp.add( pp.getName() ); - } - } - - //Table Analysis - if( pp.nrOfTables()!=0 ) nrOfTables++; - boolean b = true; - for( Table t: pp.getTables() ){ - if( b )for( int i=0; i 1 || te.getSection(0).getClass()==SectionContainer.class ){ - pagesWithTableSections.add( pp.getName() ); - b = false; - break; - } - } - } - - // if( nrOfAnalyzedPages == 1000 ) break; - } - System.out.println("Finished."); - - sortTemplates(); - writeFiles("statistics"); - - restructureTemplateNames(); - sortTemplates(); - writeTemplates("statistics.restructured"); - - System.out.println("check the Results ;-)\nnow..."); - } - - private static void sortTemplates(){ - //sort templates - System.out.println("Sort Template List"); - List sTemplateNames = new ArrayList(); - List sOcc = new ArrayList(); - List sTemplateNameFirstOcc = new ArrayList(); - - for( int i=0; i "+ percentString(nrOfTables,nrOfAnalyzedPages)+" @Pages\n"+ - "\n"+ - "Found "+sections+ " Sections in Tables\n"+ - "-> "+percentString(sections,nrOfTables)+" @Tables\n"+ - "-> "+percentString(sections,nrOfAnalyzedPages)+" @Pages\n"+ - "\n"); - - bw.write("-=Pages with Tables and Sections---------------------------------------------------\n"); - for(String s: pagesWithTableSections ) bw.write(s+"\n"); - - bw.close(); - - System.out.println( " --> OK" ); - } - - private static void restructureTemplateNames(){ - System.out.println( "restructure Template Names" ); - - List newTemplateNames = new ArrayList(); - List newTemplateNrOfOccurence = new ArrayList(); - List newTemplateNameOfFirstOccurence = new ArrayList(); - - for( int i=0; i 0) { - temp = (a*10000)/nr; - } - else { - temp = 0; - } - return temp/100+"."+(temp/10)%10+""+temp%10+"%"; - } -} diff --git a/de.tudarmstadt.ukp.wikipedia.parser/src/main/java/de/tudarmstadt/ukp/wikipedia/parser/statistics/Statistics2.java b/de.tudarmstadt.ukp.wikipedia.parser/src/main/java/de/tudarmstadt/ukp/wikipedia/parser/statistics/Statistics2.java deleted file mode 100644 index a8444e76e..000000000 --- a/de.tudarmstadt.ukp.wikipedia.parser/src/main/java/de/tudarmstadt/ukp/wikipedia/parser/statistics/Statistics2.java +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Licensed to the Technische Universität Darmstadt under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The Technische Universität Darmstadt - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package de.tudarmstadt.ukp.wikipedia.parser.statistics; - -import java.util.Date; -import java.util.Iterator; - -import de.tudarmstadt.ukp.wikipedia.api.DatabaseConfiguration; -import de.tudarmstadt.ukp.wikipedia.api.Page; -import de.tudarmstadt.ukp.wikipedia.api.WikiConstants.Language; -import de.tudarmstadt.ukp.wikipedia.api.Wikipedia; -import de.tudarmstadt.ukp.wikipedia.parser.Content.FormatType; -import de.tudarmstadt.ukp.wikipedia.parser.ParsedPage; -import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParser; -import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParserFactory; -import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.ShowTemplateNamesAndParameters; - -/** - * Creates a little Statistic about occurence of MediaWiki Elements...
- *
- * Results for 15.05.2006 Database:
- * SUBS: 279896 74.19%
- * NL: 255511 67.72%
- * DL: 1679 0.44%
- * TABLES: 64967 17.22%
- * TEMPLATES: 215022 56.99%
- * BOLD: 364484 96.61%
- * ITALIC: 231877 61.46%
- * MATH: 6499 1.72%
- * TAGS: 74236 19.67%
- * NOWIKI: 3058 0.81%
- * - */ -public class Statistics2 { - - // Variables - static int nrOfPages; - - static int nrOfPagesWithNl; - static int nrOfPagesWithDl; - static int nrOfPagesWithBold; - static int nrOfPagesWithItalic; - static int nrOfPagesWithMath; - static int nrOfPagesWithTag; - static int nrOfPagesWithNoWiki; - static int nrOfPagesWithTables; - static int nrOfPagesWithSubSections; - static int nrOfPagesWithTemplates; - - static int len_longestPage; - static long len_allPages; - - static int nrOfAnalyzedPages; - - // Debug - static final int skipPages = 0; - static final long offsetTime = 0; //1000 Sec/65536 Pages - static final boolean debug = false; - static final boolean savFiles = false; - - public static void main( String[] argv) throws Exception{ - // configure the database connection parameters - DatabaseConfiguration dbConfig = new DatabaseConfiguration(); - dbConfig.setDatabase("wikiapi_de"); - dbConfig.setHost("bender.ukp.informatik.tu-darmstadt.de"); - dbConfig.setUser("student"); - dbConfig.setPassword("student"); - dbConfig.setLanguage(Language.german); - - Wikipedia wiki = new Wikipedia(dbConfig); - - MediaWikiParserFactory pf = new MediaWikiParserFactory( ); - - pf.setTemplateParserClass( ShowTemplateNamesAndParameters.class ); - pf.setShowImageText( true ); - pf.setShowMathTagContent( true ); - pf.setDeleteTags( false ); - pf.getImageIdentifers().add("IMAGE"); - pf.setCalculateSrcSpans( false ); - - MediaWikiParser parser = pf.createParser(); - - System.out.println( parser.configurationInfo() ); - - Iterator pageIt = wiki.getArticles().iterator(); - - nrOfPages = 0; - nrOfPagesWithNl = 0; - nrOfPagesWithDl = 0; - nrOfPagesWithBold = 0; - nrOfPagesWithItalic = 0; - nrOfPagesWithMath = 0; - nrOfPagesWithTag = 0; - nrOfPagesWithTables = 0; - nrOfPagesWithSubSections = 0; - nrOfPagesWithTemplates = 0; - nrOfPagesWithNoWiki = 0; - len_allPages = 0; - len_longestPage = 0; - - nrOfAnalyzedPages = 0; - - long startTime = new Date().getTime(); - - System.out.println("START OF ANALYSATION"); - while (pageIt.hasNext()) { - Page currentPage = pageIt.next(); - nrOfAnalyzedPages++; - - //For Debugging purposes... - if( nrOfAnalyzedPages < skipPages+1 ){ - System.out.println("Skipped: "+ currentPage.getPageId()); - continue; - } - - //Parsing - String name = currentPage.getTitle().getPlainTitle(); - String src = currentPage.getText(); - - if(debug) System.out.println( " "+currentPage.getPageId()+" "+name ); - - ParsedPage pp = parser.parse(src); - - if(pp==null){ - // this is an Error, wich occures when src="" - continue; - } - - pp.setName(name); - - //ANALYSIS - - int len_page = src.length(); - if( len_page > len_longestPage ) len_longestPage = len_page; - len_allPages += len_page; - - if( pp.nrOfDefinitionLists() != 0 )nrOfPagesWithDl++; - if( pp.nrOfNestedLists() != 0 )nrOfPagesWithNl++; - if( pp.nrOfTables() != 0 ) nrOfPagesWithTables++; - if( pp.getTemplates().size() != 0 ) nrOfPagesWithTemplates++; - if( pp.getSections().size()>1 )nrOfPagesWithSubSections++; - - for( FormatType ft: pp.getFormats() ){ - if( ft == FormatType.BOLD ) nrOfPagesWithBold++; - if( ft == FormatType.ITALIC ) nrOfPagesWithItalic++; - if( ft == FormatType.NOWIKI ) nrOfPagesWithNoWiki++; - if( ft == FormatType.MATH ) nrOfPagesWithMath++; - if( ft == FormatType.TAG ) nrOfPagesWithTag++; - } - - //Screen Info - if( nrOfAnalyzedPages % 1024 == 0 ){ - long aktualTime = new Date().getTime(); - long runnedTime = aktualTime - startTime + offsetTime; - long totalTime = (runnedTime * nrOfPages) / nrOfAnalyzedPages; - - System.out.println( - percentString(nrOfAnalyzedPages,nrOfPages)+ - " -> "+nrOfAnalyzedPages+" of "+nrOfPages+" pages in "+ runnedTime/1000+"sec"+ - " -> "+(totalTime-runnedTime)/60000+"min left" - ); - - screenInfo(); - - System.out.println(); - } - - // if( nrOfAnalyzedPages == 1000 ) break; - } - System.out.println("END OF ANALYSATION"); - screenInfo(); - - } - - private static String percentString( long a, long nr){ - long temp = (a*10000)/nr; - return temp/100+"."+(temp/10)%10+""+temp%10+"%"; - } - - private static String pi( String about, int what ){ - return " "+about+": "+what+" "+percentString(what,nrOfAnalyzedPages)+"\n"; - } - - private static void screenInfo(){ - System.out.print( - pi("SUBS",nrOfPagesWithSubSections) + - pi("NL", nrOfPagesWithNl ) + - pi("DL", nrOfPagesWithDl ) + - pi("TABLES", nrOfPagesWithTables ) + - pi("TEMPLATES", nrOfPagesWithTemplates ) + - pi("BOLD", nrOfPagesWithBold ) + - pi("ITALIC", nrOfPagesWithItalic ) + - pi("MATH", nrOfPagesWithMath ) + - pi("TAGS", nrOfPagesWithTag ) + - pi("NOWIKI", nrOfPagesWithNoWiki ) - ); - - System.out.println("longes Page:"+len_longestPage); - System.out.println("average length:"+len_allPages/nrOfAnalyzedPages ); - } -} diff --git a/de.tudarmstadt.ukp.wikipedia.parser/src/test/resources/log4j.properties b/de.tudarmstadt.ukp.wikipedia.parser/src/test/resources/log4j.properties deleted file mode 100644 index f80f73b3a..000000000 --- a/de.tudarmstadt.ukp.wikipedia.parser/src/test/resources/log4j.properties +++ /dev/null @@ -1,72 +0,0 @@ -# Licensed to the Technische Universität Darmstadt under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The Technische Universität Darmstadt -# licenses this file to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -### direct log messages to stdout ### -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.Target=System.out -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n - -### direct messages to file console.log ### -log4j.appender.file=org.apache.log4j.FileAppender -log4j.appender.file.File=console.log -log4j.appender.file.layout=org.apache.log4j.PatternLayout -log4j.appender.file.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n - -### set log levels - for more verbose logging change 'info' to 'debug' ### - -#log4j.rootLogger=info, file, stdout -log4j.rootLogger=info, stdout - -log4j.logger.de.tudarmstadt.ukp.wikipedia.api=warn -log4j.logger.de.tudarmstadt.ukp.wikipedia.util=warn - -log4j.logger.org.hibernate=warn -log4j.logger.org.hibernate.cfg=info -#log4j.logger.org.hibernate=info -#log4j.logger.org.hibernate=debug - -### log HQL query parser activity -#log4j.logger.org.hibernate.hql.ast.AST=debug - -### log just the SQL -#log4j.logger.org.hibernate.SQL=debug - -### log JDBC bind parameters ### -log4j.logger.org.hibernate.type=info -#log4j.logger.org.hibernate.type=debug - -### log schema export/update ### -log4j.logger.org.hibernate.tool.hbm2ddl=debug - -### log HQL parse trees -#log4j.logger.org.hibernate.hql=debug - -### log cache activity ### -#log4j.logger.org.hibernate.cache=debug - -### log transaction activity -#log4j.logger.org.hibernate.transaction=debug - -### log JDBC resource acquisition -#log4j.logger.org.hibernate.jdbc=debug - -### log only errors of internal JDBC connection provider -log4j.logger.org.hibernate.engine.jdbc.connections.internal=error - -### enable the following line if you want to track down connection ### -### leakages when using DriverManagerConnectionProvider ### -#log4j.logger.org.hibernate.connection.DriverManagerConnectionProvider=trace diff --git a/de.tudarmstadt.ukp.wikipedia.parser/src/test/resources/log4j2.xml b/de.tudarmstadt.ukp.wikipedia.parser/src/test/resources/log4j2.xml new file mode 100644 index 000000000..73cd3e041 --- /dev/null +++ b/de.tudarmstadt.ukp.wikipedia.parser/src/test/resources/log4j2.xml @@ -0,0 +1,31 @@ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/de.tudarmstadt.ukp.wikipedia.revisionmachine/pom.xml b/de.tudarmstadt.ukp.wikipedia.revisionmachine/pom.xml index 2fcb845ed..a85025724 100644 --- a/de.tudarmstadt.ukp.wikipedia.revisionmachine/pom.xml +++ b/de.tudarmstadt.ukp.wikipedia.revisionmachine/pom.xml @@ -20,7 +20,7 @@ de.tudarmstadt.ukp.wikipedia de.tudarmstadt.ukp.wikipedia - 1.2.0-SNAPSHOT + 2.0.0-SNAPSHOT 4.0.0 de.tudarmstadt.ukp.wikipedia.revisionmachine @@ -34,10 +34,10 @@ commons-codec commons-codec
- - org.apache.ant - ant - + + org.apache.commons + commons-compress + commons-lang commons-lang @@ -47,13 +47,13 @@ slf4j-api - org.slf4j - slf4j-log4j12 + org.apache.logging.log4j + log4j-slf4j-impl provided - log4j - log4j + org.apache.logging.log4j + log4j-core provided diff --git a/de.tudarmstadt.ukp.wikipedia.revisionmachine/src/main/java/de/tudarmstadt/ukp/wikipedia/revisionmachine/archivers/Bzip2Archiver.java b/de.tudarmstadt.ukp.wikipedia.revisionmachine/src/main/java/de/tudarmstadt/ukp/wikipedia/revisionmachine/archivers/Bzip2Archiver.java index 920ce214e..9249ed0f3 100644 --- a/de.tudarmstadt.ukp.wikipedia.revisionmachine/src/main/java/de/tudarmstadt/ukp/wikipedia/revisionmachine/archivers/Bzip2Archiver.java +++ b/de.tudarmstadt.ukp.wikipedia.revisionmachine/src/main/java/de/tudarmstadt/ukp/wikipedia/revisionmachine/archivers/Bzip2Archiver.java @@ -17,6 +17,10 @@ */ package de.tudarmstadt.ukp.wikipedia.revisionmachine.archivers; + +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream; + import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.File; @@ -26,8 +30,6 @@ import java.io.InputStreamReader; import java.io.OutputStream; -import org.apache.tools.bzip2.CBZip2InputStream; -import org.apache.tools.bzip2.CBZip2OutputStream; /** * Class provides basic bzip2 compression/decompression functionality @@ -64,7 +66,7 @@ public void compress(String path) BufferedOutputStream bufStr = new BufferedOutputStream(fos); // added bzip2 prefix fos.write("BZ".getBytes()); - CBZip2OutputStream bzip2 = new CBZip2OutputStream(bufStr); + BZip2CompressorOutputStream bzip2 = new BZip2CompressorOutputStream(bufStr); while (input.available() > 0) { int size = COMPRESSION_CACHE; @@ -111,7 +113,7 @@ public OutputStream getCompressionStream(String path) // added bzip2 prefix fos.write("BZ".getBytes()); - CBZip2OutputStream bzip2 = new CBZip2OutputStream(bufStr); + BZip2CompressorOutputStream bzip2 = new BZip2CompressorOutputStream(bufStr); return bzip2; } @@ -138,7 +140,7 @@ public InputStreamReader getDecompressionStream(String path, String encoding) BufferedInputStream bufferedStream = new BufferedInputStream(fileStream); - CBZip2InputStream input = new CBZip2InputStream(bufferedStream); + BZip2CompressorInputStream input = new BZip2CompressorInputStream(bufferedStream); return new InputStreamReader(input, encoding); @@ -169,7 +171,7 @@ public void decompress(String path) BufferedInputStream buffStr = new BufferedInputStream(inputStr); - CBZip2InputStream input = new CBZip2InputStream(buffStr); + BZip2CompressorInputStream input = new BZip2CompressorInputStream(buffStr); FileOutputStream outStr = new FileOutputStream(unarchived); diff --git a/de.tudarmstadt.ukp.wikipedia.revisionmachine/src/main/java/de/tudarmstadt/ukp/wikipedia/revisionmachine/difftool/config/gui/panels/FilterPanel.java b/de.tudarmstadt.ukp.wikipedia.revisionmachine/src/main/java/de/tudarmstadt/ukp/wikipedia/revisionmachine/difftool/config/gui/panels/FilterPanel.java index dfaa6451b..da1b14473 100644 --- a/de.tudarmstadt.ukp.wikipedia.revisionmachine/src/main/java/de/tudarmstadt/ukp/wikipedia/revisionmachine/difftool/config/gui/panels/FilterPanel.java +++ b/de.tudarmstadt.ukp.wikipedia.revisionmachine/src/main/java/de/tudarmstadt/ukp/wikipedia/revisionmachine/difftool/config/gui/panels/FilterPanel.java @@ -102,7 +102,7 @@ private void initButtons() public void actionPerformed(ActionEvent arg0) { for (int i = 0; i < 22; i++) { - namespaces.getModel().setValueAt(new Boolean(true), i, 1); + namespaces.getModel().setValueAt(true, i, 1); } } @@ -119,7 +119,7 @@ public void actionPerformed(ActionEvent arg0) public void actionPerformed(ActionEvent e) { for (int i = 0; i < 22; i++) { - namespaces.getModel().setValueAt(new Boolean(false), i, 1); + namespaces.getModel().setValueAt(false, i, 1); } } @@ -175,11 +175,11 @@ public void applyConfig(ConfigSettings config) for (int j = 0; j < rows; j++) { if (namespaces.contains((this.namespaces.getModel().getValueAt( j, 2)))) { - this.namespaces.getModel().setValueAt(new Boolean(true), j, + this.namespaces.getModel().setValueAt(true, j, 1); } else { - this.namespaces.getModel().setValueAt(new Boolean(false), + this.namespaces.getModel().setValueAt(false, j, 1); } @@ -198,28 +198,28 @@ class FilterTableModel { private final String[] columnNames = { "Namespace", "Allow", "#" }; - private final Object[][] data = { { "main(0)", new Boolean(false), 0 }, - { "talk(1)", new Boolean(false), 1 }, - { "user(2)", new Boolean(false), 2 }, - { "user talk(3)", new Boolean(false), 3 }, - { "wikipedia(4)", new Boolean(false), 4 }, - { "wikipedia talk(5)", new Boolean(false), 5 }, - { "file(6)", new Boolean(false), 6 }, - { "file talk(7)", new Boolean(false), 7 }, - { "mediawiki(8)", new Boolean(false), 8 }, - { "mediawiki talk(9)", new Boolean(false), 9 }, - { "template(10)", new Boolean(false), 10 }, - { "template talk(11)", new Boolean(false), 11 }, - { "help(12)", new Boolean(false), 12 }, - { "help talk(13)", new Boolean(false), 13 }, - { "category(14)", new Boolean(false), 14 }, - { "category talk(15)", new Boolean(false), 15 }, - { "portal(100)", new Boolean(false), 100 }, - { "portal talk(101)", new Boolean(false), 101 }, - { "book(108)", new Boolean(false), 108 }, - { "book talk(109)", new Boolean(false), 109 }, - { "special(-1)", new Boolean(false), -1 }, - { "media(-2)", new Boolean(false), -2 } + private final Object[][] data = { { "main(0)", false, 0 }, + { "talk(1)", false, 1 }, + { "user(2)", false, 2 }, + { "user talk(3)", false, 3 }, + { "wikipedia(4)", false, 4 }, + { "wikipedia talk(5)", false, 5 }, + { "file(6)", false, 6 }, + { "file talk(7)", false, 7 }, + { "mediawiki(8)", false, 8 }, + { "mediawiki talk(9)", false, 9 }, + { "template(10)", false, 10 }, + { "template talk(11)", false, 11 }, + { "help(12)", false, 12 }, + { "help talk(13)", false, 13 }, + { "category(14)", false, 14 }, + { "category talk(15)", false, 15 }, + { "portal(100)", false, 100 }, + { "portal talk(101)", false, 101 }, + { "book(108)", false, 108 }, + { "book talk(109)", false, 109 }, + { "special(-1)", false, -1 }, + { "media(-2)", false, -2 } }; diff --git a/de.tudarmstadt.ukp.wikipedia.revisionmachine/src/main/resources/log4j.xml b/de.tudarmstadt.ukp.wikipedia.revisionmachine/src/main/resources/log4j2.xml similarity index 100% rename from de.tudarmstadt.ukp.wikipedia.revisionmachine/src/main/resources/log4j.xml rename to de.tudarmstadt.ukp.wikipedia.revisionmachine/src/main/resources/log4j2.xml diff --git a/de.tudarmstadt.ukp.wikipedia.revisionmachine/src/test/resources/db/wikiapi_simple_20090119_stripped.script b/de.tudarmstadt.ukp.wikipedia.revisionmachine/src/test/resources/db/wikiapi_simple_20090119_stripped.script index 7c2e2e5eb..fc22988a8 100644 --- a/de.tudarmstadt.ukp.wikipedia.revisionmachine/src/test/resources/db/wikiapi_simple_20090119_stripped.script +++ b/de.tudarmstadt.ukp.wikipedia.revisionmachine/src/test/resources/db/wikiapi_simple_20090119_stripped.script @@ -1,5 +1,4 @@ SET DATABASE UNIQUE NAME HSQLDB64408D16F3 -SET DATABASE GC 0 SET DATABASE DEFAULT RESULT MEMORY ROWS 0 SET DATABASE EVENT LOG LEVEL 0 SET DATABASE TRANSACTION CONTROL LOCKS @@ -7,18 +6,20 @@ SET DATABASE DEFAULT ISOLATION LEVEL READ COMMITTED SET DATABASE TRANSACTION ROLLBACK ON CONFLICT TRUE SET DATABASE TEXT TABLE DEFAULTS '' SET DATABASE SQL NAMES FALSE +SET DATABASE SQL RESTRICT EXEC FALSE SET DATABASE SQL REFERENCES FALSE SET DATABASE SQL SIZE FALSE SET DATABASE SQL TYPES FALSE SET DATABASE SQL TDC DELETE TRUE SET DATABASE SQL TDC UPDATE TRUE +SET DATABASE SQL SYS INDEX NAMES FALSE SET DATABASE SQL CONCAT NULLS TRUE SET DATABASE SQL UNIQUE NULLS TRUE SET DATABASE SQL CONVERT TRUNCATE TRUE SET DATABASE SQL AVG SCALE 0 SET DATABASE SQL DOUBLE NAN TRUE SET FILES WRITE DELAY 0 -SET FILES BACKUP INCREMENT FALSE +SET FILES BACKUP INCREMENT TRUE SET FILES CACHE SIZE 10000 SET FILES CACHE ROWS 50000 SET FILES SCALE 1 @@ -28,10 +29,10 @@ SET FILES NIO TRUE SET FILES NIO SIZE 256 SET FILES LOG TRUE SET FILES LOG SIZE 200 +SET FILES CHECK 229 SET DATABASE COLLATION "German" NO PAD CREATE USER SA PASSWORD DIGEST 'd41d8cd98f00b204e9800998ecf8427e' CREATE SCHEMA PUBLIC AUTHORIZATION DBA -SET SCHEMA PUBLIC CREATE MEMORY TABLE PUBLIC.CATEGORY(ID BIGINT GENERATED BY DEFAULT AS IDENTITY(START WITH 1) NOT NULL PRIMARY KEY,PAGEID INTEGER,NAME VARCHAR(255),UNIQUE(PAGEID)) ALTER TABLE PUBLIC.CATEGORY ALTER COLUMN ID RESTART WITH 18 CREATE INDEX NAMEINDEX ON PUBLIC.CATEGORY(NAME) @@ -61,11 +62,11 @@ CREATE MEMORY TABLE PUBLIC.REVISIONS(PRIMARYKEY BIGINT GENERATED BY DEFAULT AS I ALTER TABLE PUBLIC.REVISIONS ALTER COLUMN PRIMARYKEY RESTART WITH 71882 ALTER SEQUENCE SYSTEM_LOBS.LOB_ID RESTART WITH 1 SET DATABASE DEFAULT INITIAL SCHEMA PUBLIC -GRANT USAGE ON DOMAIN INFORMATION_SCHEMA.SQL_IDENTIFIER TO PUBLIC -GRANT USAGE ON DOMAIN INFORMATION_SCHEMA.YES_OR_NO TO PUBLIC -GRANT USAGE ON DOMAIN INFORMATION_SCHEMA.TIME_STAMP TO PUBLIC GRANT USAGE ON DOMAIN INFORMATION_SCHEMA.CARDINAL_NUMBER TO PUBLIC +GRANT USAGE ON DOMAIN INFORMATION_SCHEMA.YES_OR_NO TO PUBLIC GRANT USAGE ON DOMAIN INFORMATION_SCHEMA.CHARACTER_DATA TO PUBLIC +GRANT USAGE ON DOMAIN INFORMATION_SCHEMA.SQL_IDENTIFIER TO PUBLIC +GRANT USAGE ON DOMAIN INFORMATION_SCHEMA.TIME_STAMP TO PUBLIC GRANT DBA TO SA SET SCHEMA SYSTEM_LOBS INSERT INTO BLOCKS VALUES(0,2147483647,0) diff --git a/de.tudarmstadt.ukp.wikipedia.revisionmachine/src/test/resources/log4j.properties b/de.tudarmstadt.ukp.wikipedia.revisionmachine/src/test/resources/log4j.properties deleted file mode 100644 index 0094e004b..000000000 --- a/de.tudarmstadt.ukp.wikipedia.revisionmachine/src/test/resources/log4j.properties +++ /dev/null @@ -1,72 +0,0 @@ -# Licensed to the Technische Universität Darmstadt under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The Technische Universität Darmstadt -# licenses this file to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -### direct log messages to stdout ### -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.Target=System.out -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n - -### direct messages to file console.log ### -log4j.appender.file=org.apache.log4j.FileAppender -log4j.appender.file.File=console.log -log4j.appender.file.layout=org.apache.log4j.PatternLayout -log4j.appender.file.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n - -### set log levels - for more verbose logging change 'info' to 'debug' ### - -#log4j.rootLogger=info, file, stdout -log4j.rootLogger=info, stdout - -log4j.logger.de.tudarmstadt.ukp.wikipedia.api=warn -log4j.logger.de.tudarmstadt.ukp.wikipedia.util=warn -log4j.logger.de.tudarmstadt.ukp.wikipedia.revisionmachine=warn - -log4j.logger.hsqldb.db=warn - -log4j.logger.org.hibernate=warn -log4j.logger.org.hibernate.cfg=warn -#log4j.logger.org.hibernate=info -#log4j.logger.org.hibernate=debug - -### log HQL query parser activity -#log4j.logger.org.hibernate.hql.ast.AST=debug - -### log just the SQL -#log4j.logger.org.hibernate.SQL=debug - -### log JDBC bind parameters ### -log4j.logger.org.hibernate.type=info -#log4j.logger.org.hibernate.type=debug - -### log schema export/update ### -log4j.logger.org.hibernate.tool.hbm2ddl=debug - -### log HQL parse trees -#log4j.logger.org.hibernate.hql=debug - -### log cache activity ### -#log4j.logger.org.hibernate.cache=debug - -### log transaction activity -#log4j.logger.org.hibernate.transaction=debug - -### log JDBC resource acquisition -#log4j.logger.org.hibernate.jdbc=debug - -### enable the following line if you want to track down connection ### -### leakages when using DriverManagerConnectionProvider ### -#log4j.logger.org.hibernate.connection.DriverManagerConnectionProvider=trace diff --git a/de.tudarmstadt.ukp.wikipedia.revisionmachine/src/test/resources/log4j2.xml b/de.tudarmstadt.ukp.wikipedia.revisionmachine/src/test/resources/log4j2.xml new file mode 100644 index 000000000..73cd3e041 --- /dev/null +++ b/de.tudarmstadt.ukp.wikipedia.revisionmachine/src/test/resources/log4j2.xml @@ -0,0 +1,31 @@ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/de.tudarmstadt.ukp.wikipedia.timemachine/pom.xml b/de.tudarmstadt.ukp.wikipedia.timemachine/pom.xml index 51e05ee3c..14d9c23a9 100644 --- a/de.tudarmstadt.ukp.wikipedia.timemachine/pom.xml +++ b/de.tudarmstadt.ukp.wikipedia.timemachine/pom.xml @@ -20,7 +20,7 @@ de.tudarmstadt.ukp.wikipedia de.tudarmstadt.ukp.wikipedia - 1.2.0-SNAPSHOT + 2.0.0-SNAPSHOT 4.0.0 de.tudarmstadt.ukp.wikipedia.timemachine @@ -30,27 +30,27 @@ de.tudarmstadt.ukp.wikipedia de.tudarmstadt.ukp.wikipedia.wikimachine - - net.sf.trove4j - trove4j - de.tudarmstadt.ukp.wikipedia de.tudarmstadt.ukp.wikipedia.mwdumper + + it.unimi.dsi + fastutil-core + org.slf4j slf4j-api - org.slf4j - slf4j-log4j12 + org.apache.logging.log4j + log4j-slf4j-impl provided - log4j - log4j + org.apache.logging.log4j + log4j-core provided diff --git a/de.tudarmstadt.ukp.wikipedia.timemachine/src/main/java/de/tudarmstadt/ukp/wikipedia/timemachine/dump/version/DumpVersionTroveIntKey.java b/de.tudarmstadt.ukp.wikipedia.timemachine/src/main/java/de/tudarmstadt/ukp/wikipedia/timemachine/dump/version/DumpVersionFastUtilIntKey.java similarity index 92% rename from de.tudarmstadt.ukp.wikipedia.timemachine/src/main/java/de/tudarmstadt/ukp/wikipedia/timemachine/dump/version/DumpVersionTroveIntKey.java rename to de.tudarmstadt.ukp.wikipedia.timemachine/src/main/java/de/tudarmstadt/ukp/wikipedia/timemachine/dump/version/DumpVersionFastUtilIntKey.java index 787e1d5b9..06427ed3e 100644 --- a/de.tudarmstadt.ukp.wikipedia.timemachine/src/main/java/de/tudarmstadt/ukp/wikipedia/timemachine/dump/version/DumpVersionTroveIntKey.java +++ b/de.tudarmstadt.ukp.wikipedia.timemachine/src/main/java/de/tudarmstadt/ukp/wikipedia/timemachine/dump/version/DumpVersionFastUtilIntKey.java @@ -20,7 +20,9 @@ import java.io.IOException; import java.sql.Timestamp; import java.util.HashMap; +import java.util.HashSet; import java.util.Map; +import java.util.Set; import de.tudarmstadt.ukp.wikipedia.timemachine.domain.Revision; import de.tudarmstadt.ukp.wikipedia.wikimachine.dump.sql.CategorylinksParser; @@ -32,10 +34,11 @@ import de.tudarmstadt.ukp.wikipedia.wikimachine.util.Redirects; import de.tudarmstadt.ukp.wikipedia.wikimachine.util.TimestampUtil; import de.tudarmstadt.ukp.wikipedia.wikimachine.util.TxtFileWriter; -import gnu.trove.map.hash.TIntIntHashMap; -import gnu.trove.set.hash.TIntHashSet; +import it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap; +import it.unimi.dsi.fastutil.ints.IntArraySet; +import it.unimi.dsi.fastutil.ints.IntSet; -public class DumpVersionTroveIntKey extends AbstractDumpVersion { +public class DumpVersionFastUtilIntKey extends AbstractDumpVersion { private static final String SQL_NULL = "NULL"; /** * maps page id's to Revision objects @@ -45,16 +48,16 @@ public class DumpVersionTroveIntKey extends AbstractDumpVersion { * after revision parsing the map will be erased and the keys sorted in the * array list */ - private TIntHashSet pageIdRevList; + private IntSet pageIdRevList; /** * caches the page id's of disambiguation pages. */ - private TIntHashSet disambiguations; + private IntSet disambiguations; /** * maps text id's to the page id's. */ - private TIntIntHashMap textIdPageIdMap; + private Int2IntOpenHashMap textIdPageIdMap; /** * maps page id's of pages to their names */ @@ -62,12 +65,12 @@ public class DumpVersionTroveIntKey extends AbstractDumpVersion { /** * maps names of pages to their page id's. */ - private TIntIntHashMap pNamePageIdMap; + private Int2IntOpenHashMap pNamePageIdMap; /** * maps names of categories to their page id's. */ - private TIntIntHashMap cNamePageIdMap; + private Int2IntOpenHashMap cNamePageIdMap; /** * maps page id's of redirects to their names. @@ -100,7 +103,7 @@ public void freeAfterPageParsing() { @Override public void freeAfterRevisonParsing() { - pageIdRevList = new TIntHashSet(pageIdRevMap.keySet().size()); + pageIdRevList = new IntArraySet(pageIdRevMap.keySet().size()); for (Integer key : pageIdRevMap.keySet()) { pageIdRevList.add(key); } @@ -127,22 +130,22 @@ public void initialize(Timestamp timestamp) { /** * filled in revisions */ - pageIdRevMap = new HashMap(); - textIdPageIdMap = new TIntIntHashMap(); + pageIdRevMap = new HashMap<>(); + textIdPageIdMap = new Int2IntOpenHashMap(); /** * filled in pages */ - pPageIdNameMap = new HashMap(); - pNamePageIdMap = new TIntIntHashMap(); + pPageIdNameMap = new HashMap<>(); + pNamePageIdMap = new Int2IntOpenHashMap(); - cNamePageIdMap = new TIntIntHashMap(); - rPageIdNameMap = new HashMap(); + cNamePageIdMap = new Int2IntOpenHashMap(); + rPageIdNameMap = new HashMap<>(); /** * filled in categories */ - disambiguations = new TIntHashSet(); + disambiguations = new IntArraySet(); } @Override diff --git a/de.tudarmstadt.ukp.wikipedia.timemachine/src/main/java/de/tudarmstadt/ukp/wikipedia/timemachine/dump/version/DumpVersionJDKGeneric.java b/de.tudarmstadt.ukp.wikipedia.timemachine/src/main/java/de/tudarmstadt/ukp/wikipedia/timemachine/dump/version/DumpVersionJDKGeneric.java index 21194df05..2aee86aad 100644 --- a/de.tudarmstadt.ukp.wikipedia.timemachine/src/main/java/de/tudarmstadt/ukp/wikipedia/timemachine/dump/version/DumpVersionJDKGeneric.java +++ b/de.tudarmstadt.ukp.wikipedia.timemachine/src/main/java/de/tudarmstadt/ukp/wikipedia/timemachine/dump/version/DumpVersionJDKGeneric.java @@ -20,7 +20,9 @@ import java.io.IOException; import java.sql.Timestamp; import java.util.HashMap; +import java.util.HashSet; import java.util.Map; +import java.util.Set; import de.tudarmstadt.ukp.wikipedia.timemachine.domain.Revision; import de.tudarmstadt.ukp.wikipedia.wikimachine.dump.sql.CategorylinksParser; @@ -33,8 +35,6 @@ import de.tudarmstadt.ukp.wikipedia.wikimachine.util.Redirects; import de.tudarmstadt.ukp.wikipedia.wikimachine.util.TimestampUtil; import de.tudarmstadt.ukp.wikipedia.wikimachine.util.TxtFileWriter; -import gnu.trove.map.hash.TIntIntHashMap; -import gnu.trove.set.hash.TIntHashSet; /** * Please be sure, that {@code hashCode(String)} of the provided HashAlgorithm type returns the @@ -60,16 +60,16 @@ public class DumpVersionJDKGeneric pageIdRevList; /** * caches the page id's of disambiguation pages. */ - private TIntHashSet disambiguations; + private Set disambiguations; /** * maps text id's to the page id's. */ - private TIntIntHashMap textIdPageIdMap; + private Map textIdPageIdMap; /** * maps page id's of pages to their names */ @@ -123,7 +123,7 @@ public void freeAfterPageParsing() { @Override public void freeAfterRevisonParsing() { - pageIdRevList = new TIntHashSet(pageIdRevMap.keySet().size()); + pageIdRevList = new HashSet<>(pageIdRevMap.keySet().size()); for (Integer key : pageIdRevMap.keySet()) { pageIdRevList.add(key); } @@ -151,7 +151,7 @@ public void initialize(Timestamp timestamp) { * filled in revisions */ pageIdRevMap = new HashMap(); - textIdPageIdMap = new TIntIntHashMap(); + textIdPageIdMap = new HashMap<>(); /** * filled in pages @@ -165,7 +165,7 @@ public void initialize(Timestamp timestamp) { /** * filled in categories */ - disambiguations = new TIntHashSet(); + disambiguations = new HashSet<>(); } @SuppressWarnings("unchecked") diff --git a/de.tudarmstadt.ukp.wikipedia.timemachine/src/main/resources/context/applicationContext.xml b/de.tudarmstadt.ukp.wikipedia.timemachine/src/main/resources/context/applicationContext.xml index 39c4c9cc6..6b8ed53d4 100644 --- a/de.tudarmstadt.ukp.wikipedia.timemachine/src/main/resources/context/applicationContext.xml +++ b/de.tudarmstadt.ukp.wikipedia.timemachine/src/main/resources/context/applicationContext.xml @@ -66,7 +66,7 @@ de.tudarmstadt.ukp.wikipedia.timemachine.dump.version.DumpVersionJDKIntKeyFactory (bean id="dumpVersionFactory") de.tudarmstadt.ukp.wikipedia.timemachine.dump.version.DumpVersionJDKLongKeyFactory (bean id="dumpVersionFactory") de.tudarmstadt.ukp.wikipedia.timemachine.dump.version.DumpVersionJDKStringKeyFactory (bean id="dumpVersionFactory") - de.tudarmstadt.ukp.wikipedia.timemachine.dump.version.DumpVersionTroveIntKey (bean id="dumpVersion" without factory) [default] + de.tudarmstadt.ukp.wikipedia.timemachine.dump.version.DumpVersionFastUtilIntKey (bean id="dumpVersion" without factory) [default] de.tudarmstadt.ukp.wikipedia.timemachine.dump.version.OriginalDumpVersion (bean id="dumpVersion" without factory) --> @@ -91,7 +91,7 @@ --> - + diff --git a/de.tudarmstadt.ukp.wikipedia.timemachine/src/main/resources/log4j.xml b/de.tudarmstadt.ukp.wikipedia.timemachine/src/main/resources/log4j.xml deleted file mode 100644 index 0fdb83952..000000000 --- a/de.tudarmstadt.ukp.wikipedia.timemachine/src/main/resources/log4j.xml +++ /dev/null @@ -1,61 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/de.tudarmstadt.ukp.wikipedia.timemachine/src/main/resources/log4j2.xml b/de.tudarmstadt.ukp.wikipedia.timemachine/src/main/resources/log4j2.xml new file mode 100644 index 000000000..73cd3e041 --- /dev/null +++ b/de.tudarmstadt.ukp.wikipedia.timemachine/src/main/resources/log4j2.xml @@ -0,0 +1,31 @@ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/de.tudarmstadt.ukp.wikipedia.tutorial/pom.xml b/de.tudarmstadt.ukp.wikipedia.tutorial/pom.xml index 6f9ebc77e..6c5a47933 100644 --- a/de.tudarmstadt.ukp.wikipedia.tutorial/pom.xml +++ b/de.tudarmstadt.ukp.wikipedia.tutorial/pom.xml @@ -21,12 +21,12 @@ de.tudarmstadt.ukp.wikipedia de.tudarmstadt.ukp.wikipedia - 1.2.0-SNAPSHOT + 2.0.0-SNAPSHOT 4.0.0 de.tudarmstadt.ukp.wikipedia.tutorial - 1.2.0-SNAPSHOT + 2.0.0-SNAPSHOT de.tudarmstadt.ukp.wikipedia diff --git a/de.tudarmstadt.ukp.wikipedia.util/pom.xml b/de.tudarmstadt.ukp.wikipedia.util/pom.xml index eaa0e0ac2..96604e53b 100644 --- a/de.tudarmstadt.ukp.wikipedia.util/pom.xml +++ b/de.tudarmstadt.ukp.wikipedia.util/pom.xml @@ -20,7 +20,7 @@ de.tudarmstadt.ukp.wikipedia de.tudarmstadt.ukp.wikipedia - 1.2.0-SNAPSHOT + 2.0.0-SNAPSHOT de.tudarmstadt.ukp.wikipedia.util JWPL API and RevisionAPI Utilities @@ -49,10 +49,21 @@ commons-logging commons-logging - - org.sweble.wikitext - swc-engine - + + de.tudarmstadt.ukp.wikipedia + jwpl-swc-engine-shade + ${project.version} + + + org.sweble.wikitext + swc-engine + + + org.sweble.wikitext + swc-parser-lazy + + + de.fau.cs.osr.ptk ptk-common diff --git a/de.tudarmstadt.ukp.wikipedia.util/src/main/java/de/tudarmstadt/ukp/wikipedia/util/templates/parser/ParseUtils.java b/de.tudarmstadt.ukp.wikipedia.util/src/main/java/de/tudarmstadt/ukp/wikipedia/util/templates/parser/ParseUtils.java index 5b7044ec6..89b451a52 100644 --- a/de.tudarmstadt.ukp.wikipedia.util/src/main/java/de/tudarmstadt/ukp/wikipedia/util/templates/parser/ParseUtils.java +++ b/de.tudarmstadt.ukp.wikipedia.util/src/main/java/de/tudarmstadt/ukp/wikipedia/util/templates/parser/ParseUtils.java @@ -22,8 +22,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import javax.xml.bind.JAXBException; - +import jakarta.xml.bind.JAXBException; import org.sweble.wikitext.engine.config.WikiConfig; import org.sweble.wikitext.engine.EngineException; import org.sweble.wikitext.engine.nodes.EngProcessedPage; @@ -49,7 +48,7 @@ public class ParseUtils * @param revision the revision id * @return list of ExtractedSections */ - public static List getSections(String text, String title, long revision) throws LinkTargetException, EngineException, FileNotFoundException, JAXBException{ + public static List getSections(String text, String title, long revision) throws LinkTargetException, EngineException, FileNotFoundException, JAXBException { return (List) parsePage(new SectionExtractor(), text, title, revision); } diff --git a/de.tudarmstadt.ukp.wikipedia.wikimachine/pom.xml b/de.tudarmstadt.ukp.wikipedia.wikimachine/pom.xml index 34d2f6511..021d17778 100644 --- a/de.tudarmstadt.ukp.wikipedia.wikimachine/pom.xml +++ b/de.tudarmstadt.ukp.wikipedia.wikimachine/pom.xml @@ -20,20 +20,12 @@ de.tudarmstadt.ukp.wikipedia de.tudarmstadt.ukp.wikipedia - 1.2.0-SNAPSHOT + 2.0.0-SNAPSHOT 4.0.0 de.tudarmstadt.ukp.wikipedia.wikimachine WikiMachine - - org.apache.ant - ant - - - javax.mail - mail - org.springframework spring-core diff --git a/de.tudarmstadt.ukp.wikipedia.wikimachine/src/main/java/context/applicationContextTemplate.xml b/de.tudarmstadt.ukp.wikipedia.wikimachine/src/main/java/context/applicationContextTemplate.xml index e489336ae..7138120b5 100644 --- a/de.tudarmstadt.ukp.wikipedia.wikimachine/src/main/java/context/applicationContextTemplate.xml +++ b/de.tudarmstadt.ukp.wikipedia.wikimachine/src/main/java/context/applicationContextTemplate.xml @@ -80,7 +80,7 @@ de.tudarmstadt.ukp.wikipedia.timemachine.dump.version.DumpVersionJDKIntKeyFactory (bean id="dumpVersionFactory") de.tudarmstadt.ukp.wikipedia.timemachine.dump.version.DumpVersionJDKLongKeyFactory (bean id="dumpVersionFactory") de.tudarmstadt.ukp.wikipedia.timemachine.dump.version.DumpVersionJDKStringKeyFactory (bean id="dumpVersionFactory") - de.tudarmstadt.ukp.wikipedia.timemachine.dump.version.DumpVersionTroveIntKey (bean id="dumpVersion" without factory) [default] + de.tudarmstadt.ukp.wikipedia.timemachine.dump.version.DumpVersionFastUtilIntKey (bean id="dumpVersion" without factory) [default] de.tudarmstadt.ukp.wikipedia.timemachine.dump.version.OriginalDumpVersion (bean id="dumpVersion" without factory) --> diff --git a/de.tudarmstadt.ukp.wikipedia.wikimachine/src/main/java/de/tudarmstadt/ukp/wikipedia/wikimachine/debug/MailMemoryLogger.java b/de.tudarmstadt.ukp.wikipedia.wikimachine/src/main/java/de/tudarmstadt/ukp/wikipedia/wikimachine/debug/MailMemoryLogger.java deleted file mode 100644 index bde2b3628..000000000 --- a/de.tudarmstadt.ukp.wikipedia.wikimachine/src/main/java/de/tudarmstadt/ukp/wikipedia/wikimachine/debug/MailMemoryLogger.java +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Licensed to the Technische Universität Darmstadt under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The Technische Universität Darmstadt - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package de.tudarmstadt.ukp.wikipedia.wikimachine.debug; - -import java.lang.invoke.MethodHandles; -import java.text.SimpleDateFormat; -import java.util.Calendar; -import java.util.Date; -import java.util.Properties; - -import javax.mail.Message; -import javax.mail.MessagingException; -import javax.mail.Session; -import javax.mail.Transport; -import javax.mail.internet.InternetAddress; -import javax.mail.internet.MimeMessage; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * A logger implementation which directs its logging output an SMTP mail server endpoint. - * - * @deprecated Don't use this in production code/environments as there are open runtime issues with the SMTP config. - */ -@Deprecated -public class MailMemoryLogger extends AbstractLogger { - - private static final Logger logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - - private static final String DATE_FORMAT_NOW = "yyyy.MM.dd HH:mm:ss"; - - // TODO Do these hard-coded values make sense here? - private static final String ADDRESS_TO = "mail.logger.system@googlemail.com"; - private static final String ADDRESS_FROM = "i_galkin@rbg.informatik.tu-darmstadt.de"; - - /** - * For configuration reference: - * @see 0) { - send(); - } - super.finalize(); - } - - @Override - public void logObject(Object message) { - appendRuntimeInfo(); - messageBuffer.append(message); - messageBuffer.append("\n"); - long timeStamp = System.currentTimeMillis(); - if (++messageCount > MESSAGES_MAX || (timeStamp - lastSend) > LASTSEND_MAX) { - - send(); - messageCount = 0; - lastSend = timeStamp; - messageBuffer.setLength(0); - - } - } -} diff --git a/de.tudarmstadt.ukp.wikipedia.wikimachine/src/main/java/de/tudarmstadt/ukp/wikipedia/wikimachine/decompression/BZip2Decompressor.java b/de.tudarmstadt.ukp.wikipedia.wikimachine/src/main/java/de/tudarmstadt/ukp/wikipedia/wikimachine/decompression/BZip2Decompressor.java index a02aa0f7d..7b919cedd 100644 --- a/de.tudarmstadt.ukp.wikipedia.wikimachine/src/main/java/de/tudarmstadt/ukp/wikipedia/wikimachine/decompression/BZip2Decompressor.java +++ b/de.tudarmstadt.ukp.wikipedia.wikimachine/src/main/java/de/tudarmstadt/ukp/wikipedia/wikimachine/decompression/BZip2Decompressor.java @@ -17,13 +17,13 @@ */ package de.tudarmstadt.ukp.wikipedia.wikimachine.decompression; +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; + import java.io.BufferedInputStream; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; -import org.apache.tools.bzip2.CBZip2InputStream; - /** * BZip2 Decompressor (based on Singleton Design Pattern). Uses getInputStream * to set up the archive path and returns the InputStream to read from @@ -44,7 +44,7 @@ public InputStream getInputStream(String fileName) throws IOException { * /pkg/bzip2 /CBZip2InputStream.html */ inputStream.skip(2); - outputStream = new CBZip2InputStream(inputStream); + outputStream = new BZip2CompressorInputStream(inputStream); return outputStream; diff --git a/de.tudarmstadt.ukp.wikipedia.wikimachine/src/test/resources/log4j.properties b/de.tudarmstadt.ukp.wikipedia.wikimachine/src/test/resources/log4j.properties deleted file mode 100644 index 1f62585b2..000000000 --- a/de.tudarmstadt.ukp.wikipedia.wikimachine/src/test/resources/log4j.properties +++ /dev/null @@ -1,17 +0,0 @@ -log4j.rootLogger=DEBUG, logfile, stdout - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.threshold=info -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -#log4j.appender.stdout.layout.ConversionPattern=%d [%t] %-5p %c - %m%n -log4j.appender.stdout.layout.ConversionPattern=%m%n -log4j.appender.stdout.threshold=info - - - -log4j.appender.logfile=org.apache.log4j.RollingFileAppender -#log4j.appender.logfile.MaxFileSize=1000KB -#log4j.appender.logfile.MaxBackupIndex=10 -log4j.appender.logfile.File=log4j.log -log4j.appender.logfile.layout=org.apache.log4j.PatternLayout -log4j.appender.logfile.layout.ConversionPattern=%d [%t] %-5p %c - %m%n \ No newline at end of file diff --git a/de.tudarmstadt.ukp.wikipedia.wikimachine/src/test/resources/log4j2.xml b/de.tudarmstadt.ukp.wikipedia.wikimachine/src/test/resources/log4j2.xml new file mode 100644 index 000000000..73cd3e041 --- /dev/null +++ b/de.tudarmstadt.ukp.wikipedia.wikimachine/src/test/resources/log4j2.xml @@ -0,0 +1,31 @@ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/jwpl-deps/jwpl-swc-engine-shade/pom.xml b/jwpl-deps/jwpl-swc-engine-shade/pom.xml new file mode 100644 index 000000000..d74b1a9bf --- /dev/null +++ b/jwpl-deps/jwpl-swc-engine-shade/pom.xml @@ -0,0 +1,147 @@ + + + + de.tudarmstadt.ukp.wikipedia + jwpl-deps + 2.0.0-SNAPSHOT + + 4.0.0 + jwpl-swc-engine-shade + Dependencies :: Jakarta :: swc-engine + + + + org.sweble.wikitext + swc-engine + ${org.sweble.wikitext.version} + + + commons-collections + commons-collections + 3.2.2 + + + commons-io + commons-io + 1.4 + + + de.fau.cs.osr.ptk + ptk-common + + + de.fau.cs.osr.utils + utils + + + org.apache.commons + commons-lang3 + 3.4 + + + org.slf4j + slf4j-api + + + org.sweble.wikitext + swc-parser-lazy + + + xtc + rats-runtime + 1.15.0 + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 3.5.1 + + + package + + shade + + + true + true + false + + + org.sweble.wikitext:swc-engine:* + + + + + + + + javax.xml.bind + jakarta.xml.bind + + + + + + + + + org.codehaus.mojo + build-helper-maven-plugin + 3.4.0 + + + workaround-makeItVisibleOnIntellij + none + + attach-artifact + + + + + ${project.build.directory}/${project.build.finalName}.jar + jar + optional + + + + + + + + + + + \ No newline at end of file diff --git a/jwpl-deps/pom.xml b/jwpl-deps/pom.xml new file mode 100644 index 000000000..e7902a671 --- /dev/null +++ b/jwpl-deps/pom.xml @@ -0,0 +1,33 @@ + + + + de.tudarmstadt.ukp.wikipedia + de.tudarmstadt.ukp.wikipedia + 2.0.0-SNAPSHOT + + 4.0.0 + jwpl-deps + Dependencies :: Jakarta + pom + + + jwpl-swc-engine-shade + + \ No newline at end of file diff --git a/pom.xml b/pom.xml index e6a4baf7b..829ba42ae 100644 --- a/pom.xml +++ b/pom.xml @@ -23,7 +23,7 @@ A free, Java-based application programming interface that allows to access all information contained in Wikipedia. de.tudarmstadt.ukp.wikipedia de.tudarmstadt.ukp.wikipedia - 1.2.0-SNAPSHOT + 2.0.0-SNAPSHOT org.dkpro dkpro-parent-pom @@ -31,23 +31,23 @@ UTF-8 - 1.2.0-SNAPSHOT + 2.0.0-SNAPSHOT 3.0.8 3.0.8 3.1.9 - 4.3.17.RELEASE + 5.3.30 - 5.2.17.Final + 6.1.7.Final 5.1.44 - 42.2.2 - 2.4.0 - 0.9.5.2 + 3.2.0 + 42.6.0 + 2.7.2 - 1.7.21 - 1.2.16 + 1.7.30 + 2.21.0 2.21.0 @@ -95,7 +95,7 @@ junit junit - 4.11 + 4.13.2 org.slf4j @@ -103,72 +103,41 @@ ${slf4j.version} - org.slf4j - slf4j-log4j12 - ${slf4j.version} - provided - - - log4j - log4j - ${log4j.version} - provided - - - jms - javax.jms - - - jmxtools - com.sun.jdmk - - - jmxri - com.sun.jmx - - - - - org.hibernate - hibernate-core - ${hibernate.version} + org.apache.logging.log4j + log4j-slf4j-impl + ${log4j2.version} - org.hibernate - hibernate-c3p0 - ${hibernate.version} - runtime + org.apache.logging.log4j + log4j-core + ${log4j2.version} - com.mchange - c3p0 - ${c3p0.version} - runtime + org.hibernate.orm + hibernate-core + ${hibernate.version} org.jgrapht - jgrapht-jdk1.5 - 0.7.3 - - - net.sf.trove4j - trove4j - 3.0.2 + jgrapht-core + 1.5.2 com.neovisionaries nv-i18n - 1.23 + 1.29 mysql mysql-connector-java ${mysql.version} + runtime - org.postgresql - postgresql - ${postgresql.version} + org.mariadb.jdbc + mariadb-java-client + ${mariadb.version} + runtime org.hsqldb @@ -176,21 +145,6 @@ ${hsqldb.version} test - - jfree - jfreechart - 1.0.12 - - - org.apache.ant - ant - 1.8.3 - - - javax.mail - mail - 1.4.1 - org.springframework spring-core @@ -209,17 +163,22 @@ commons-codec commons-codec - 1.6 + 1.16.0 + + + org.apache.commons + commons-compress + 1.24.0 commons-logging commons-logging - 1.1.1 + 1.2 - net.java.dev.swing-layout - swing-layout - 1.0.2 + it.unimi.dsi + fastutil-core + 8.5.12 de.tudarmstadt.ukp.wikipedia @@ -271,11 +230,6 @@ swc-parser-lazy ${org.sweble.wikitext.version} - - org.sweble.wikitext - swc-engine - ${org.sweble.wikitext.version} - org.sweble.wom3 sweble-wom3-parent @@ -299,7 +253,7 @@ jakarta.xml.bind jakarta.xml.bind-api - 2.3.3 + 4.0.0 @@ -359,6 +313,7 @@ de.tudarmstadt.ukp.wikipedia.util de.tudarmstadt.ukp.wikipedia.parser de.tudarmstadt.ukp.wikipedia.tutorial + jwpl-deps GitHub