From 9279d268a967f17c790bc190f43b0d10cb1ec766 Mon Sep 17 00:00:00 2001 From: xpqiu Date: Sun, 16 Nov 2014 09:39:41 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B02.1=E7=89=88=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fnlp-app/pom.xml | 6 +- fnlp-core/.classpath | 1 - fnlp-core/.project | 13 ---- fnlp-core/pom.xml | 4 +- .../ml/classifier/AbstractClassifier.java | 1 + .../org/fnlp/ml/classifier/linear/Linear.java | 8 +-- .../ml/classifier/linear/OnlineTrainer.java | 63 ++++++++--------- .../java/org/fnlp/nlp/cn/PartOfSpeech.java | 25 ++++--- .../fnlp/nlp/cn/anaphora/EntitiesGetter.java | 4 +- .../nlp/cn/anaphora/train/ARClassifier.java | 2 +- .../parser/dep/train/JointParerTrainer.java | 4 +- .../nlp/parser/dep/train/ParserTrainer.java | 4 +- .../java/org/fnlp/nlp/tag/AbstractTagger.java | 4 +- fnlp-demo/pom.xml | 8 +-- .../org/fnlp/demo/ml/SequenceLabeling.java | 2 +- .../org/fnlp/demo/ml/SimpleClassifier2.java | 4 +- .../src/test/java/org/fnlp/demo/MLTest.java | 33 +++++++++ .../src/test/java/org/fnlp/demo/NLPTest.java | 42 +++++++++++ fnlp-dev/pom.xml | 6 +- fnlp-train/.classpath | 8 +-- .../org.eclipse.core.resources.prefs | 1 - fnlp-train/pom.xml | 6 +- .../java/org/fnlp/train/tag/addedTagger.java | 2 +- .../java/org/fnlp/nlp/cn/rl/RLSegTest.java | 69 ------------------- pom.xml | 2 +- 25 files changed, 157 insertions(+), 165 deletions(-) create mode 100644 fnlp-demo/src/test/java/org/fnlp/demo/MLTest.java create mode 100644 fnlp-demo/src/test/java/org/fnlp/demo/NLPTest.java delete mode 100644 fnlp-train/src/test/java/org/fnlp/nlp/cn/rl/RLSegTest.java diff --git a/fnlp-app/pom.xml b/fnlp-app/pom.xml index a55dd63..4854d79 100644 --- a/fnlp-app/pom.xml +++ b/fnlp-app/pom.xml @@ -4,11 +4,11 @@ org.fnlp fnlp-all - 2.0-SNAPSHOT + 2.1-SNAPSHOT org.fnlp fnlp-app - 2.0-SNAPSHOT + 2.1-SNAPSHOT fnlp-app http://maven.apache.org @@ -24,7 +24,7 @@ org.fnlp fnlp-core - 2.0-SNAPSHOT + 2.1-SNAPSHOT org.apache.lucene diff --git a/fnlp-core/.classpath b/fnlp-core/.classpath index b46ac09..c9464c0 100644 --- a/fnlp-core/.classpath +++ b/fnlp-core/.classpath @@ -20,7 +20,6 @@ - diff --git a/fnlp-core/.project b/fnlp-core/.project index 1dde3c2..e999988 100644 --- a/fnlp-core/.project +++ b/fnlp-core/.project @@ -5,11 +5,6 @@ - - org.eclipse.wst.common.project.facet.core.builder - - - org.eclipse.jdt.core.javabuilder @@ -20,17 +15,9 @@ - - org.eclipse.wst.validation.validationbuilder - - - - org.eclipse.jem.workbench.JavaEMFNature - org.eclipse.wst.common.modulecore.ModuleCoreNature org.eclipse.m2e.core.maven2Nature org.eclipse.jdt.core.javanature - org.eclipse.wst.common.project.facet.core.nature diff --git a/fnlp-core/pom.xml b/fnlp-core/pom.xml index fa9f5da..0da2d3b 100644 --- a/fnlp-core/pom.xml +++ b/fnlp-core/pom.xml @@ -4,12 +4,12 @@ org.fnlp fnlp-all - 2.0-SNAPSHOT + 2.1-SNAPSHOT ../pom.xml org.fnlp fnlp-core - 2.0-SNAPSHOT + 2.1-SNAPSHOT fnlp-core https://github.com/xpqiu/fnlp/ FNLP is developed for Chinese natural language processing (NLP), which also includes some machine learning algorithms and data sets to achieve the NLP tasks. FNLP is distributed under LGPL3.0. diff --git a/fnlp-core/src/main/java/org/fnlp/ml/classifier/AbstractClassifier.java b/fnlp-core/src/main/java/org/fnlp/ml/classifier/AbstractClassifier.java index 04b8b36..9fc4d0c 100644 --- a/fnlp-core/src/main/java/org/fnlp/ml/classifier/AbstractClassifier.java +++ b/fnlp-core/src/main/java/org/fnlp/ml/classifier/AbstractClassifier.java @@ -36,6 +36,7 @@ public abstract class AbstractClassifier implements Serializable{ private static final long serialVersionUID = -175929257288466023L; + protected AlphabetFactory factory; diff --git a/fnlp-core/src/main/java/org/fnlp/ml/classifier/linear/Linear.java b/fnlp-core/src/main/java/org/fnlp/ml/classifier/linear/Linear.java index e887a3f..cf68e73 100644 --- a/fnlp-core/src/main/java/org/fnlp/ml/classifier/linear/Linear.java +++ b/fnlp-core/src/main/java/org/fnlp/ml/classifier/linear/Linear.java @@ -51,9 +51,7 @@ public class Linear extends AbstractClassifier implements Serializable { private static final long serialVersionUID = -2626247109469506636L; - protected Inferencer inferencer; - - protected AlphabetFactory factory; + protected Inferencer inferencer; protected Pipe pipe; @@ -145,8 +143,6 @@ public Pipe getPipe() { return pipe; } - public AlphabetFactory getAlphabetFactory() { - return factory; - } + } \ No newline at end of file diff --git a/fnlp-core/src/main/java/org/fnlp/ml/classifier/linear/OnlineTrainer.java b/fnlp-core/src/main/java/org/fnlp/ml/classifier/linear/OnlineTrainer.java index 34e648a..8597ae7 100644 --- a/fnlp-core/src/main/java/org/fnlp/ml/classifier/linear/OnlineTrainer.java +++ b/fnlp-core/src/main/java/org/fnlp/ml/classifier/linear/OnlineTrainer.java @@ -40,7 +40,7 @@ /** * 在线参数训练类, * 可能问题:收敛控制,参数c设置过小,可能会导致“假收敛”的情况 2012.8.6 - * + * @author xpqiu */ public class OnlineTrainer extends AbstractTrainer { @@ -61,7 +61,6 @@ public class OnlineTrainer extends AbstractTrainer { public boolean interim = false; public float c=0.1f; - public float threshold = 0.99f; protected Linear classifier; @@ -74,31 +73,32 @@ public class OnlineTrainer extends AbstractTrainer { protected float[] weights; AlphabetFactory af; - public OnlineTrainer(AlphabetFactory af, int iternum) { - //默认特征生成器 - Generator gen = new SFGenerator(); - //默认推理器 - this.inferencer = new LinearMax(gen, af.getLabelSize()); - //默认损失函数 - this.loss = new ZeroOneLoss(); - //默认参数更新策略 - this.update = new LinearMaxPAUpdate(loss); - this.iternum = iternum; - this.c = 0.1f; - this.af = af; - weights = (float[]) inferencer.getWeights(); - if (weights == null) { - weights = new float[af.getFeatureSize()]; - inferencer.setWeights(weights); - } - random = new Random(1l); - } + /** * 构造函数 * @param af 字典 */ public OnlineTrainer(AlphabetFactory af) { this(af,50); + } + + public OnlineTrainer(AlphabetFactory af, int iternum) { + //默认特征生成器 + Generator gen = new SFGenerator(); + //默认推理器 + this.inferencer = new LinearMax(gen, af.getLabelSize()); + //默认损失函数 + this.loss = new ZeroOneLoss(); + //默认参数更新策略 + this.update = new LinearMaxPAUpdate(loss); + this.iternum = iternum; + this.af = af; + weights = (float[]) inferencer.getWeights(); + if (weights == null) { + weights = new float[af.getFeatureSize()]; + inferencer.setWeights(weights); + } + random = new Random(1l); } /** @@ -106,23 +106,24 @@ public OnlineTrainer(AlphabetFactory af) { * @param inferencer 推理算法 * @param update 参数更新方法 * @param loss 损失计算方法 - * @param fsize 特征数量 + * @param af 特征标签词典 * @param iternum 最大迭代次数 * @param c 步长阈值 */ public OnlineTrainer(Inferencer inferencer, Update update, - Loss loss, int fsize, int iternum, float c) { + Loss loss, AlphabetFactory af, int iternum, float c) { this.inferencer = inferencer; this.update = update; this.loss = loss; this.iternum = iternum; - this.c = c; + this.c = c; + this.af =af; weights = (float[]) inferencer.getWeights(); if (weights == null) { - weights = new float[fsize]; + weights = new float[af.getFeatureSize()]; inferencer.setWeights(weights); - }else if(weights.length org.fnlp fnlp-all - 2.0-SNAPSHOT + 2.1-SNAPSHOT org.fnlp fnlp-demo - 2.0-SNAPSHOT + 2.1-SNAPSHOT fnlp-demo http://maven.apache.org @@ -24,12 +24,12 @@ org.fnlp fnlp-core - 2.0-SNAPSHOT + 2.1-SNAPSHOT org.fnlp fnlp-app - 2.0-SNAPSHOT + 2.1-SNAPSHOT diff --git a/fnlp-demo/src/main/java/org/fnlp/demo/ml/SequenceLabeling.java b/fnlp-demo/src/main/java/org/fnlp/demo/ml/SequenceLabeling.java index b52204b..d48e86b 100644 --- a/fnlp-demo/src/main/java/org/fnlp/demo/ml/SequenceLabeling.java +++ b/fnlp-demo/src/main/java/org/fnlp/demo/ml/SequenceLabeling.java @@ -104,7 +104,7 @@ public static void main(String[] args) throws Exception { OnlineTrainer trainer = new OnlineTrainer(inference, update, loss, - features.size(), 50,0.1f); + factory, 50,0.1f); Linear cl = trainer.train(trainSet); diff --git a/fnlp-demo/src/main/java/org/fnlp/demo/ml/SimpleClassifier2.java b/fnlp-demo/src/main/java/org/fnlp/demo/ml/SimpleClassifier2.java index 448b9bf..3ddbaa2 100644 --- a/fnlp-demo/src/main/java/org/fnlp/demo/ml/SimpleClassifier2.java +++ b/fnlp-demo/src/main/java/org/fnlp/demo/ml/SimpleClassifier2.java @@ -59,7 +59,7 @@ public static void main(String[] args) throws Exception { long start = System.currentTimeMillis(); - path = "./example-data/data-classification.txt"; + path = "../example-data/data-classification.txt"; Pipe lpipe = new Target2Label(al); Pipe fpipe = new StringArray2IndexArray(factory, true); @@ -90,7 +90,7 @@ public static void main(String[] args) throws Exception { Inferencer msolver = new LinearMax(featureGen, al.size() ); - OnlineTrainer trainer = new OnlineTrainer(msolver, update, loss, af.size(), round, + OnlineTrainer trainer = new OnlineTrainer(msolver, update, loss, factory, round, c); Linear classify = trainer.train(train, test); diff --git a/fnlp-demo/src/test/java/org/fnlp/demo/MLTest.java b/fnlp-demo/src/test/java/org/fnlp/demo/MLTest.java new file mode 100644 index 0000000..e86a8fa --- /dev/null +++ b/fnlp-demo/src/test/java/org/fnlp/demo/MLTest.java @@ -0,0 +1,33 @@ +package org.fnlp.demo; + +import static org.junit.Assert.*; + +import org.fnlp.demo.ml.HierClassifierUsage1; +import org.fnlp.demo.ml.HierClassifierUsage2; +import org.fnlp.demo.ml.SequenceLabeling; +import org.fnlp.demo.ml.SimpleClassifier2; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +public class MLTest { + + @BeforeClass + public static void setUpBeforeClass() throws Exception { + } + + @AfterClass + public static void tearDownAfterClass() throws Exception { + } + + @Test + public void test() throws Exception { + + SequenceLabeling.main(null); + SimpleClassifier2.main(null); + HierClassifierUsage1.main(null); + HierClassifierUsage2.main(null); + + } + +} diff --git a/fnlp-demo/src/test/java/org/fnlp/demo/NLPTest.java b/fnlp-demo/src/test/java/org/fnlp/demo/NLPTest.java new file mode 100644 index 0000000..641dbf9 --- /dev/null +++ b/fnlp-demo/src/test/java/org/fnlp/demo/NLPTest.java @@ -0,0 +1,42 @@ +package org.fnlp.demo; + +import static org.junit.Assert.*; + +import org.fnlp.demo.ml.HierClassifierUsage1; +import org.fnlp.demo.ml.HierClassifierUsage2; +import org.fnlp.demo.ml.SequenceLabeling; +import org.fnlp.demo.ml.SimpleClassifier2; +import org.fnlp.demo.nlp.AnaphoraResolution; +import org.fnlp.demo.nlp.ChineseWordSegmentation; +import org.fnlp.demo.nlp.DepParser; +import org.fnlp.demo.nlp.KeyWordExtraction; +import org.fnlp.demo.nlp.NamedEntityRecognition; +import org.fnlp.demo.nlp.PartsOfSpeechTag; +import org.fnlp.demo.nlp.TimeExpressionRecognition; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +public class NLPTest { + + @BeforeClass + public static void setUpBeforeClass() throws Exception { + } + + @AfterClass + public static void tearDownAfterClass() throws Exception { + } + + @Test + public void test() throws Exception { + ChineseWordSegmentation.main(null); + PartsOfSpeechTag.main(null); + DepParser.main(null); + KeyWordExtraction.main(null); + NamedEntityRecognition.main(null); + TimeExpressionRecognition.main(null); +// AnaphoraResolution.main(null); + + } + +} diff --git a/fnlp-dev/pom.xml b/fnlp-dev/pom.xml index def9314..906f3b9 100644 --- a/fnlp-dev/pom.xml +++ b/fnlp-dev/pom.xml @@ -4,11 +4,11 @@ org.fnlp fnlp-all - 2.0-SNAPSHOT + 2.1-SNAPSHOT org.fnlp fnlp-dev - 2.0-SNAPSHOT + 2.1-SNAPSHOT fnlp-dev http://maven.apache.org @@ -24,7 +24,7 @@ org.fnlp fnlp-core - 2.0-SNAPSHOT + 2.1-SNAPSHOT diff --git a/fnlp-train/.classpath b/fnlp-train/.classpath index c9464c0..f94f9d9 100644 --- a/fnlp-train/.classpath +++ b/fnlp-train/.classpath @@ -6,19 +6,19 @@ - + - - + - + + diff --git a/fnlp-train/.settings/org.eclipse.core.resources.prefs b/fnlp-train/.settings/org.eclipse.core.resources.prefs index f9fe345..e9441bb 100644 --- a/fnlp-train/.settings/org.eclipse.core.resources.prefs +++ b/fnlp-train/.settings/org.eclipse.core.resources.prefs @@ -1,4 +1,3 @@ eclipse.preferences.version=1 encoding//src/main/java=UTF-8 -encoding//src/test/java=UTF-8 encoding/=UTF-8 diff --git a/fnlp-train/pom.xml b/fnlp-train/pom.xml index d663a89..566eb05 100644 --- a/fnlp-train/pom.xml +++ b/fnlp-train/pom.xml @@ -4,11 +4,11 @@ org.fnlp fnlp-all - 2.0-SNAPSHOT + 2.1-SNAPSHOT org.fnlp fnlp-train - 2.0-SNAPSHOT + 2.1-SNAPSHOT fnlp-train http://maven.apache.org @@ -24,7 +24,7 @@ org.fnlp fnlp-core - 2.0-SNAPSHOT + 2.1-SNAPSHOT diff --git a/fnlp-train/src/main/java/org/fnlp/train/tag/addedTagger.java b/fnlp-train/src/main/java/org/fnlp/train/tag/addedTagger.java index c84b878..cd9d4ad 100644 --- a/fnlp-train/src/main/java/org/fnlp/train/tag/addedTagger.java +++ b/fnlp-train/src/main/java/org/fnlp/train/tag/addedTagger.java @@ -311,7 +311,7 @@ public void train() throws Exception { } OnlineTrainer trainer = new OnlineTrainer(inference, update, loss, - features.size(), iterNum, c1); + factory, iterNum, c1); trainer.innerOptimized = false; trainer.finalOptimized = true; diff --git a/fnlp-train/src/test/java/org/fnlp/nlp/cn/rl/RLSegTest.java b/fnlp-train/src/test/java/org/fnlp/nlp/cn/rl/RLSegTest.java deleted file mode 100644 index 9063e7a..0000000 --- a/fnlp-train/src/test/java/org/fnlp/nlp/cn/rl/RLSegTest.java +++ /dev/null @@ -1,69 +0,0 @@ -/** -* This file is part of FNLP (formerly FudanNLP). -* -* FNLP is free software: you can redistribute it and/or modify -* it under the terms of the GNU Lesser General Public License as published by -* the Free Software Foundation, either version 3 of the License, or -* (at your option) any later version. -* -* FNLP is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Lesser General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with FudanNLP. If not, see . -* -* Copyright 2009-2014 www.fnlp.org. All rights reserved. -*/ - -package org.fnlp.nlp.cn.rl; - -import static org.junit.Assert.*; - -import java.io.IOException; - -import org.fnlp.nlp.cn.tag.CWSTagger; -import org.junit.After; -import org.junit.AfterClass; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.Test; - -import gnu.trove.set.hash.THashSet; - -public class RLSegTest { - RLSeg rlseg; - - @BeforeClass - public static void setUpBeforeClass() throws Exception { - } - - @AfterClass - public static void tearDownAfterClass() throws Exception { - } - - @Before - public void setUp() throws Exception { - CWSTagger tag = new CWSTagger("../models/seg.m"); - - rlseg = new RLSeg(tag,"../tmp/FNLPDATA/all.dict"); - } - - @After - public void tearDown() throws Exception { - rlseg.close(); - } - - @Test - public void testGetNewWords() throws IOException { -// THashSet newset = new THashSet(); -// THashSet set; -//// set = rlseg.getNewWords("考几"); -//// set = rlseg.getNewWords("抛诸脑后"); -// set = rlseg.getNewWords("买iphone"); -// System.out.println(set); -// - } - -} \ No newline at end of file diff --git a/pom.xml b/pom.xml index 2fca376..54a489b 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ org.fnlp fnlp-all - 2.0-SNAPSHOT + 2.1-SNAPSHOT pom