Skip to content

Commit

Permalink
更新2.1版本
Browse files Browse the repository at this point in the history
  • Loading branch information
xpqiu committed Nov 16, 2014
1 parent f3c6c64 commit 9279d26
Show file tree
Hide file tree
Showing 25 changed files with 157 additions and 165 deletions.
6 changes: 3 additions & 3 deletions fnlp-app/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
<parent>
<groupId>org.fnlp</groupId>
<artifactId>fnlp-all</artifactId>
<version>2.0-SNAPSHOT</version>
<version>2.1-SNAPSHOT</version>
</parent>
<groupId>org.fnlp</groupId>
<artifactId>fnlp-app</artifactId>
<version>2.0-SNAPSHOT</version>
<version>2.1-SNAPSHOT</version>
<name>fnlp-app</name>
<url>http://maven.apache.org</url>
<properties>
Expand All @@ -24,7 +24,7 @@
<dependency>
<groupId>org.fnlp</groupId>
<artifactId>fnlp-core</artifactId>
<version>2.0-SNAPSHOT</version>
<version>2.1-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
Expand Down
1 change: 0 additions & 1 deletion fnlp-core/.classpath
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
<attribute name="org.eclipse.jst.component.nondependency" value=""/>
</attributes>
</classpathentry>
<classpathentry kind="output" path="target/classes"/>
Expand Down
13 changes: 0 additions & 13 deletions fnlp-core/.project
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,6 @@
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.wst.common.project.facet.core.builder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
Expand All @@ -20,17 +15,9 @@
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.wst.validation.validationbuilder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jem.workbench.JavaEMFNature</nature>
<nature>org.eclipse.wst.common.modulecore.ModuleCoreNature</nature>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
<nature>org.eclipse.jdt.core.javanature</nature>
<nature>org.eclipse.wst.common.project.facet.core.nature</nature>
</natures>
</projectDescription>
4 changes: 2 additions & 2 deletions fnlp-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
<parent>
<groupId>org.fnlp</groupId>
<artifactId>fnlp-all</artifactId>
<version>2.0-SNAPSHOT</version>
<version>2.1-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<groupId>org.fnlp</groupId>
<artifactId>fnlp-core</artifactId>
<version>2.0-SNAPSHOT</version>
<version>2.1-SNAPSHOT</version>
<name>fnlp-core</name>
<url>https://github.com/xpqiu/fnlp/</url>
<description>FNLP is developed for Chinese natural language processing (NLP), which also includes some machine learning algorithms and data sets to achieve the NLP tasks. FNLP is distributed under LGPL3.0.</description>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
public abstract class AbstractClassifier implements Serializable{

private static final long serialVersionUID = -175929257288466023L;

protected AlphabetFactory factory;


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,7 @@ public class Linear extends AbstractClassifier implements Serializable {

private static final long serialVersionUID = -2626247109469506636L;

protected Inferencer inferencer;

protected AlphabetFactory factory;
protected Inferencer inferencer;

protected Pipe pipe;

Expand Down Expand Up @@ -145,8 +143,6 @@ public Pipe getPipe() {
return pipe;
}

public AlphabetFactory getAlphabetFactory() {
return factory;
}


}
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
/**
* 在线参数训练类,
* 可能问题:收敛控制,参数c设置过小,可能会导致“假收敛”的情况 2012.8.6
*
* @author xpqiu
*/
public class OnlineTrainer extends AbstractTrainer {

Expand All @@ -61,7 +61,6 @@ public class OnlineTrainer extends AbstractTrainer {
public boolean interim = false;

public float c=0.1f;

public float threshold = 0.99f;

protected Linear classifier;
Expand All @@ -74,55 +73,57 @@ public class OnlineTrainer extends AbstractTrainer {
protected float[] weights;
AlphabetFactory af;

public OnlineTrainer(AlphabetFactory af, int iternum) {
//默认特征生成器
Generator gen = new SFGenerator();
//默认推理器
this.inferencer = new LinearMax(gen, af.getLabelSize());
//默认损失函数
this.loss = new ZeroOneLoss();
//默认参数更新策略
this.update = new LinearMaxPAUpdate(loss);
this.iternum = iternum;
this.c = 0.1f;
this.af = af;
weights = (float[]) inferencer.getWeights();
if (weights == null) {
weights = new float[af.getFeatureSize()];
inferencer.setWeights(weights);
}
random = new Random(1l);
}

/**
* 构造函数
* @param af 字典
*/
public OnlineTrainer(AlphabetFactory af) {
this(af,50);
}

public OnlineTrainer(AlphabetFactory af, int iternum) {
//默认特征生成器
Generator gen = new SFGenerator();
//默认推理器
this.inferencer = new LinearMax(gen, af.getLabelSize());
//默认损失函数
this.loss = new ZeroOneLoss();
//默认参数更新策略
this.update = new LinearMaxPAUpdate(loss);
this.iternum = iternum;
this.af = af;
weights = (float[]) inferencer.getWeights();
if (weights == null) {
weights = new float[af.getFeatureSize()];
inferencer.setWeights(weights);
}
random = new Random(1l);
}

/**
* 构造函数
* @param inferencer 推理算法
* @param update 参数更新方法
* @param loss 损失计算方法
* @param fsize 特征数量
* @param af 特征标签词典
* @param iternum 最大迭代次数
* @param c 步长阈值
*/
public OnlineTrainer(Inferencer inferencer, Update update,
Loss loss, int fsize, int iternum, float c) {
Loss loss, AlphabetFactory af, int iternum, float c) {
this.inferencer = inferencer;
this.update = update;
this.loss = loss;
this.iternum = iternum;
this.c = c;
this.c = c;
this.af =af;
weights = (float[]) inferencer.getWeights();
if (weights == null) {
weights = new float[fsize];
weights = new float[af.getFeatureSize()];
inferencer.setWeights(weights);
}else if(weights.length<fsize){
weights = Arrays.copyOf(weights, fsize);
}else if(weights.length<af.getFeatureSize()){
weights = Arrays.copyOf(weights, af.getFeatureSize());
inferencer.setWeights(weights);
}
random = new Random(1l);
Expand All @@ -133,12 +134,12 @@ public OnlineTrainer(Inferencer inferencer, Update update,
* @param classifier 分类器
* @param update 参数更新方法
* @param loss 损失计算方法
* @param fsize 特征数量
* @param af 特征标签词典
* @param iternum 最大迭代次数
* @param c 步长阈值
*/
public OnlineTrainer(Linear classifier, Update update, Loss loss, int fsize, int iternum, float c) {
this(classifier.getInferencer(), update, loss, fsize, iternum, c);
public OnlineTrainer(Linear classifier, Update update, Loss loss, AlphabetFactory af, int iternum, float c) {
this(classifier.getInferencer(), update, loss, af, iternum, c);
}

/**
Expand Down Expand Up @@ -252,7 +253,7 @@ public Linear train(InstanceSet trainset, InstanceSet devset) {
System.out.println();

if (interim) {
Linear p = new Linear(inferencer, trainset.getAlphabetFactory());
Linear p = new Linear(inferencer, af);
try {
p.saveTo("tmp.model");
} catch (IOException e) {
Expand Down
25 changes: 16 additions & 9 deletions fnlp-core/src/main/java/org/fnlp/nlp/cn/PartOfSpeech.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ public enum PartOfSpeech {
专有名,
人名,
地名,
机构名,
机构名,
实体名,
型号名,
事件名,
网址,
Expand All @@ -41,7 +42,7 @@ public enum PartOfSpeech {

代词,
人称代词,
指示代词,
指示词,
疑问代词,


Expand All @@ -58,8 +59,8 @@ public enum PartOfSpeech {


动词,
能愿动词,
趋向动词,
情态词,
趋向词,
被动词,
把动词,

Expand Down Expand Up @@ -104,7 +105,7 @@ public static PartOfSpeech[] valueOf(String[] pos) {
static{
Pronoun.add(代词);
Pronoun.add(人称代词);
Pronoun.add(指示代词);
Pronoun.add(指示词);
Pronoun.add(疑问代词);
}
/**
Expand All @@ -129,7 +130,8 @@ public boolean isMark() {
entities.add(人名);
entities.add(地名);
entities.add(机构名);
entities.add(专有名);
entities.add(专有名);
entities.add(实体名);
}

/**
Expand All @@ -141,13 +143,18 @@ public boolean isEntiry() {
return entities.contains(this);
}
/**
* 判断词性是否为一个实体,包括:人名|地名|机构名|专有名。
* 判断词性是否为一个实体,包括:人名|地名|机构名|专有名|实体名
* @param pos
* @return
*/
public static boolean isEntiry(String pos) {
PartOfSpeech p = valueOf(pos);

PartOfSpeech p;
try {
p = valueOf(pos);
} catch (Exception e) {
System.err.println(pos+"不存在");
return false;
}
return p.isEntiry();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public class EntitiesGetter {
NP.add(PartOfSpeech.限定词);
NP.add(PartOfSpeech.名词);
NP.add(PartOfSpeech.代词);
NP.add(PartOfSpeech.指示代词);
NP.add(PartOfSpeech.指示词);
NP.add(PartOfSpeech.人称代词);
NP.add(PartOfSpeech.疑问代词);

Expand Down Expand Up @@ -218,7 +218,7 @@ else if(this.isMale(ey.getData())){
NN.add(PartOfSpeech.机构名);
NN.add(PartOfSpeech.代词);
NN.add(PartOfSpeech.人称代词);
NN.add(PartOfSpeech.指示代词);
NN.add(PartOfSpeech.指示词);
NN.add(PartOfSpeech.疑问代词);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ public void train() throws Exception {
ZeroOneLoss l = new ZeroOneLoss();
Inferencer ms = new LinearMax(gen, factory.getLabelSize());
Update update = new LinearMaxPAUpdate(l);
OnlineTrainer trainer = new OnlineTrainer(ms, update,l, factory.getFeatureSize(), 50,0.005f);
OnlineTrainer trainer = new OnlineTrainer(ms, update,l, factory, 50,0.005f);
Linear pclassifier = trainer.train(instset,instset);
pipe.removeTargetPipe();
pclassifier.setPipe(pipe);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,10 +195,8 @@ private InstanceSet buildInstanceList(String file) throws IOException {
public void train(String dataFile, int maxite, float c) throws IOException {

InstanceSet instset = buildInstanceList(dataFile);
IFeatureAlphabet features = factory.DefaultFeatureAlphabet();

SFGenerator generator = new SFGenerator();
int fsize = features.size();

LabelAlphabet la = factory.DefaultLabelAlphabet();
int ysize = la.size();
Expand All @@ -207,7 +205,7 @@ public void train(String dataFile, int maxite, float c) throws IOException {
ZeroOneLoss loss = new ZeroOneLoss();
Update update = new LinearMaxPAUpdate(loss);
OnlineTrainer trainer = new OnlineTrainer(solver, update, loss,
fsize, maxite, c);
factory, maxite, c);
Linear models = trainer.train(instset, null);
instset = null;
solver = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,11 +191,9 @@ public void train(String dataFile, int maxite, float c) throws IOException {

LabelAlphabet postagAlphabet = factory.buildLabelAlphabet("postag");

IFeatureAlphabet features = factory.DefaultFeatureAlphabet();

SFGenerator generator = new SFGenerator();
Linear[] models = new Linear[postagAlphabet.size()];
int fsize = features.size();

for (int i = 0; i < postagAlphabet.size(); i++) {
String pos = postagAlphabet.lookupString(i);
Expand All @@ -208,7 +206,7 @@ public void train(String dataFile, int maxite, float c) throws IOException {
ZeroOneLoss loss = new ZeroOneLoss();
Update update = new LinearMaxPAUpdate(loss);
OnlineTrainer trainer = new OnlineTrainer(solver, update, loss,
fsize, maxite, c);
factory, maxite, c);
models[i] = trainer.train(instset, null);
instset = null;
solver = null;
Expand Down
4 changes: 2 additions & 2 deletions fnlp-core/src/main/java/org/fnlp/nlp/tag/AbstractTagger.java
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,10 @@ public void train() throws Exception {
OnlineTrainer trainer;

if(cl!=null){
trainer = new OnlineTrainer(cl, update, loss, features.size(),iterNum, c);
trainer = new OnlineTrainer(cl, update, loss, factory,iterNum, c);
}else{
trainer = new OnlineTrainer(inference, update, loss,
features.size(), iterNum, c);
factory, iterNum, c);
}

cl = trainer.train(trainSet, testSet);
Expand Down
Loading

0 comments on commit 9279d26

Please sign in to comment.