0) {
- temp = (a*10000)/nr;
- }
- else {
- temp = 0;
- }
- return temp/100+"."+(temp/10)%10+""+temp%10+"%";
- }
-}
diff --git a/de.tudarmstadt.ukp.wikipedia.parser/src/main/java/de/tudarmstadt/ukp/wikipedia/parser/statistics/Statistics2.java b/de.tudarmstadt.ukp.wikipedia.parser/src/main/java/de/tudarmstadt/ukp/wikipedia/parser/statistics/Statistics2.java
deleted file mode 100644
index a8444e76e..000000000
--- a/de.tudarmstadt.ukp.wikipedia.parser/src/main/java/de/tudarmstadt/ukp/wikipedia/parser/statistics/Statistics2.java
+++ /dev/null
@@ -1,216 +0,0 @@
-/*
- * Licensed to the Technische Universität Darmstadt under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The Technische Universität Darmstadt
- * licenses this file to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package de.tudarmstadt.ukp.wikipedia.parser.statistics;
-
-import java.util.Date;
-import java.util.Iterator;
-
-import de.tudarmstadt.ukp.wikipedia.api.DatabaseConfiguration;
-import de.tudarmstadt.ukp.wikipedia.api.Page;
-import de.tudarmstadt.ukp.wikipedia.api.WikiConstants.Language;
-import de.tudarmstadt.ukp.wikipedia.api.Wikipedia;
-import de.tudarmstadt.ukp.wikipedia.parser.Content.FormatType;
-import de.tudarmstadt.ukp.wikipedia.parser.ParsedPage;
-import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParser;
-import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParserFactory;
-import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.ShowTemplateNamesAndParameters;
-
-/**
- * Creates a little Statistic about occurence of MediaWiki Elements...
- *
- * Results for 15.05.2006 Database:
- * SUBS: 279896 74.19%
- * NL: 255511 67.72%
- * DL: 1679 0.44%
- * TABLES: 64967 17.22%
- * TEMPLATES: 215022 56.99%
- * BOLD: 364484 96.61%
- * ITALIC: 231877 61.46%
- * MATH: 6499 1.72%
- * TAGS: 74236 19.67%
- * NOWIKI: 3058 0.81%
- *
- */
-public class Statistics2 {
-
- // Variables
- static int nrOfPages;
-
- static int nrOfPagesWithNl;
- static int nrOfPagesWithDl;
- static int nrOfPagesWithBold;
- static int nrOfPagesWithItalic;
- static int nrOfPagesWithMath;
- static int nrOfPagesWithTag;
- static int nrOfPagesWithNoWiki;
- static int nrOfPagesWithTables;
- static int nrOfPagesWithSubSections;
- static int nrOfPagesWithTemplates;
-
- static int len_longestPage;
- static long len_allPages;
-
- static int nrOfAnalyzedPages;
-
- // Debug
- static final int skipPages = 0;
- static final long offsetTime = 0; //1000 Sec/65536 Pages
- static final boolean debug = false;
- static final boolean savFiles = false;
-
- public static void main( String[] argv) throws Exception{
- // configure the database connection parameters
- DatabaseConfiguration dbConfig = new DatabaseConfiguration();
- dbConfig.setDatabase("wikiapi_de");
- dbConfig.setHost("bender.ukp.informatik.tu-darmstadt.de");
- dbConfig.setUser("student");
- dbConfig.setPassword("student");
- dbConfig.setLanguage(Language.german);
-
- Wikipedia wiki = new Wikipedia(dbConfig);
-
- MediaWikiParserFactory pf = new MediaWikiParserFactory( );
-
- pf.setTemplateParserClass( ShowTemplateNamesAndParameters.class );
- pf.setShowImageText( true );
- pf.setShowMathTagContent( true );
- pf.setDeleteTags( false );
- pf.getImageIdentifers().add("IMAGE");
- pf.setCalculateSrcSpans( false );
-
- MediaWikiParser parser = pf.createParser();
-
- System.out.println( parser.configurationInfo() );
-
- Iterator pageIt = wiki.getArticles().iterator();
-
- nrOfPages = 0;
- nrOfPagesWithNl = 0;
- nrOfPagesWithDl = 0;
- nrOfPagesWithBold = 0;
- nrOfPagesWithItalic = 0;
- nrOfPagesWithMath = 0;
- nrOfPagesWithTag = 0;
- nrOfPagesWithTables = 0;
- nrOfPagesWithSubSections = 0;
- nrOfPagesWithTemplates = 0;
- nrOfPagesWithNoWiki = 0;
- len_allPages = 0;
- len_longestPage = 0;
-
- nrOfAnalyzedPages = 0;
-
- long startTime = new Date().getTime();
-
- System.out.println("START OF ANALYSATION");
- while (pageIt.hasNext()) {
- Page currentPage = pageIt.next();
- nrOfAnalyzedPages++;
-
- //For Debugging purposes...
- if( nrOfAnalyzedPages < skipPages+1 ){
- System.out.println("Skipped: "+ currentPage.getPageId());
- continue;
- }
-
- //Parsing
- String name = currentPage.getTitle().getPlainTitle();
- String src = currentPage.getText();
-
- if(debug) System.out.println( " "+currentPage.getPageId()+" "+name );
-
- ParsedPage pp = parser.parse(src);
-
- if(pp==null){
- // this is an Error, wich occures when src=""
- continue;
- }
-
- pp.setName(name);
-
- //ANALYSIS
-
- int len_page = src.length();
- if( len_page > len_longestPage ) len_longestPage = len_page;
- len_allPages += len_page;
-
- if( pp.nrOfDefinitionLists() != 0 )nrOfPagesWithDl++;
- if( pp.nrOfNestedLists() != 0 )nrOfPagesWithNl++;
- if( pp.nrOfTables() != 0 ) nrOfPagesWithTables++;
- if( pp.getTemplates().size() != 0 ) nrOfPagesWithTemplates++;
- if( pp.getSections().size()>1 )nrOfPagesWithSubSections++;
-
- for( FormatType ft: pp.getFormats() ){
- if( ft == FormatType.BOLD ) nrOfPagesWithBold++;
- if( ft == FormatType.ITALIC ) nrOfPagesWithItalic++;
- if( ft == FormatType.NOWIKI ) nrOfPagesWithNoWiki++;
- if( ft == FormatType.MATH ) nrOfPagesWithMath++;
- if( ft == FormatType.TAG ) nrOfPagesWithTag++;
- }
-
- //Screen Info
- if( nrOfAnalyzedPages % 1024 == 0 ){
- long aktualTime = new Date().getTime();
- long runnedTime = aktualTime - startTime + offsetTime;
- long totalTime = (runnedTime * nrOfPages) / nrOfAnalyzedPages;
-
- System.out.println(
- percentString(nrOfAnalyzedPages,nrOfPages)+
- " -> "+nrOfAnalyzedPages+" of "+nrOfPages+" pages in "+ runnedTime/1000+"sec"+
- " -> "+(totalTime-runnedTime)/60000+"min left"
- );
-
- screenInfo();
-
- System.out.println();
- }
-
- // if( nrOfAnalyzedPages == 1000 ) break;
- }
- System.out.println("END OF ANALYSATION");
- screenInfo();
-
- }
-
- private static String percentString( long a, long nr){
- long temp = (a*10000)/nr;
- return temp/100+"."+(temp/10)%10+""+temp%10+"%";
- }
-
- private static String pi( String about, int what ){
- return " "+about+": "+what+" "+percentString(what,nrOfAnalyzedPages)+"\n";
- }
-
- private static void screenInfo(){
- System.out.print(
- pi("SUBS",nrOfPagesWithSubSections) +
- pi("NL", nrOfPagesWithNl ) +
- pi("DL", nrOfPagesWithDl ) +
- pi("TABLES", nrOfPagesWithTables ) +
- pi("TEMPLATES", nrOfPagesWithTemplates ) +
- pi("BOLD", nrOfPagesWithBold ) +
- pi("ITALIC", nrOfPagesWithItalic ) +
- pi("MATH", nrOfPagesWithMath ) +
- pi("TAGS", nrOfPagesWithTag ) +
- pi("NOWIKI", nrOfPagesWithNoWiki )
- );
-
- System.out.println("longes Page:"+len_longestPage);
- System.out.println("average length:"+len_allPages/nrOfAnalyzedPages );
- }
-}
diff --git a/de.tudarmstadt.ukp.wikipedia.parser/src/test/resources/log4j.properties b/de.tudarmstadt.ukp.wikipedia.parser/src/test/resources/log4j.properties
deleted file mode 100644
index f80f73b3a..000000000
--- a/de.tudarmstadt.ukp.wikipedia.parser/src/test/resources/log4j.properties
+++ /dev/null
@@ -1,72 +0,0 @@
-# Licensed to the Technische Universität Darmstadt under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The Technische Universität Darmstadt
-# licenses this file to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-### direct log messages to stdout ###
-log4j.appender.stdout=org.apache.log4j.ConsoleAppender
-log4j.appender.stdout.Target=System.out
-log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
-log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n
-
-### direct messages to file console.log ###
-log4j.appender.file=org.apache.log4j.FileAppender
-log4j.appender.file.File=console.log
-log4j.appender.file.layout=org.apache.log4j.PatternLayout
-log4j.appender.file.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n
-
-### set log levels - for more verbose logging change 'info' to 'debug' ###
-
-#log4j.rootLogger=info, file, stdout
-log4j.rootLogger=info, stdout
-
-log4j.logger.de.tudarmstadt.ukp.wikipedia.api=warn
-log4j.logger.de.tudarmstadt.ukp.wikipedia.util=warn
-
-log4j.logger.org.hibernate=warn
-log4j.logger.org.hibernate.cfg=info
-#log4j.logger.org.hibernate=info
-#log4j.logger.org.hibernate=debug
-
-### log HQL query parser activity
-#log4j.logger.org.hibernate.hql.ast.AST=debug
-
-### log just the SQL
-#log4j.logger.org.hibernate.SQL=debug
-
-### log JDBC bind parameters ###
-log4j.logger.org.hibernate.type=info
-#log4j.logger.org.hibernate.type=debug
-
-### log schema export/update ###
-log4j.logger.org.hibernate.tool.hbm2ddl=debug
-
-### log HQL parse trees
-#log4j.logger.org.hibernate.hql=debug
-
-### log cache activity ###
-#log4j.logger.org.hibernate.cache=debug
-
-### log transaction activity
-#log4j.logger.org.hibernate.transaction=debug
-
-### log JDBC resource acquisition
-#log4j.logger.org.hibernate.jdbc=debug
-
-### log only errors of internal JDBC connection provider
-log4j.logger.org.hibernate.engine.jdbc.connections.internal=error
-
-### enable the following line if you want to track down connection ###
-### leakages when using DriverManagerConnectionProvider ###
-#log4j.logger.org.hibernate.connection.DriverManagerConnectionProvider=trace
diff --git a/de.tudarmstadt.ukp.wikipedia.parser/src/test/resources/log4j2.xml b/de.tudarmstadt.ukp.wikipedia.parser/src/test/resources/log4j2.xml
new file mode 100644
index 000000000..73cd3e041
--- /dev/null
+++ b/de.tudarmstadt.ukp.wikipedia.parser/src/test/resources/log4j2.xml
@@ -0,0 +1,31 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/de.tudarmstadt.ukp.wikipedia.revisionmachine/pom.xml b/de.tudarmstadt.ukp.wikipedia.revisionmachine/pom.xml
index 2fcb845ed..a85025724 100644
--- a/de.tudarmstadt.ukp.wikipedia.revisionmachine/pom.xml
+++ b/de.tudarmstadt.ukp.wikipedia.revisionmachine/pom.xml
@@ -20,7 +20,7 @@
de.tudarmstadt.ukp.wikipedia
de.tudarmstadt.ukp.wikipedia
- 1.2.0-SNAPSHOT
+ 2.0.0-SNAPSHOT
4.0.0
de.tudarmstadt.ukp.wikipedia.revisionmachine
@@ -34,10 +34,10 @@
commons-codec
commons-codec