Skip to content

Commit

Permalink
Merge pull request #66 from severinsimmler/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
severinsimmler authored Dec 23, 2021
2 parents 15e81a6 + 5b3ec1f commit 3a2626b
Show file tree
Hide file tree
Showing 66 changed files with 2,957 additions and 447 deletions.
61 changes: 31 additions & 30 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,39 +3,40 @@
from setuptools import Extension

SOURCES = [
"chaine/core/crf.cpp",
"chaine/core/crfsuite/lib/cqdb/src/cqdb.c",
"chaine/core/crfsuite/lib/cqdb/src/lookup3.c",
"chaine/core/crfsuite/lib/crf/src/crf1d_context.c",
"chaine/core/crfsuite/lib/crf/src/crf1d_encode.c",
"chaine/core/crfsuite/lib/crf/src/crf1d_feature.c",
"chaine/core/crfsuite/lib/crf/src/crf1d_model.c",
"chaine/core/crfsuite/lib/crf/src/crf1d_tag.c",
"chaine/core/crfsuite/lib/crf/src/crfsuite.c",
"chaine/core/crfsuite/lib/crf/src/crfsuite_train.c",
"chaine/core/crfsuite/lib/crf/src/dataset.c",
"chaine/core/crfsuite/lib/crf/src/dictionary.c",
"chaine/core/crfsuite/lib/crf/src/holdout.c",
"chaine/core/crfsuite/lib/crf/src/logging.c",
"chaine/core/crfsuite/lib/crf/src/params.c",
"chaine/core/crfsuite/lib/crf/src/quark.c",
"chaine/core/crfsuite/lib/crf/src/rumavl.c",
"chaine/core/crfsuite/lib/crf/src/train_arow.c",
"chaine/core/crfsuite/lib/crf/src/train_averaged_perceptron.c",
"chaine/core/crfsuite/lib/crf/src/train_l2sgd.c",
"chaine/core/crfsuite/lib/crf/src/train_lbfgs.c",
"chaine/core/crfsuite/lib/crf/src/train_passive_aggressive.c",
"chaine/core/crfsuite/swig/crfsuite.cpp",
"chaine/core/liblbfgs/lib/lbfgs.c",
"chaine/core/trainer_wrapper.cpp",
"chaine/_core/crf.cpp",
"chaine/_core/crfsuite/lib/cqdb/src/cqdb.c",
"chaine/_core/crfsuite/lib/cqdb/src/lookup3.c",
"chaine/_core/crfsuite/lib/crf/src/crf1d_context.c",
"chaine/_core/crfsuite/lib/crf/src/crf1d_encode.c",
"chaine/_core/crfsuite/lib/crf/src/crf1d_feature.c",
"chaine/_core/crfsuite/lib/crf/src/crf1d_model.c",
"chaine/_core/crfsuite/lib/crf/src/crf1d_tag.c",
"chaine/_core/crfsuite/lib/crf/src/crfsuite.c",
"chaine/_core/crfsuite/lib/crf/src/crfsuite_train.c",
"chaine/_core/crfsuite/lib/crf/src/dataset.c",
"chaine/_core/crfsuite/lib/crf/src/dictionary.c",
"chaine/_core/crfsuite/lib/crf/src/holdout.c",
"chaine/_core/crfsuite/lib/crf/src/json.c",
"chaine/_core/crfsuite/lib/crf/src/logging.c",
"chaine/_core/crfsuite/lib/crf/src/params.c",
"chaine/_core/crfsuite/lib/crf/src/quark.c",
"chaine/_core/crfsuite/lib/crf/src/rumavl.c",
"chaine/_core/crfsuite/lib/crf/src/train_arow.c",
"chaine/_core/crfsuite/lib/crf/src/train_averaged_perceptron.c",
"chaine/_core/crfsuite/lib/crf/src/train_l2sgd.c",
"chaine/_core/crfsuite/lib/crf/src/train_lbfgs.c",
"chaine/_core/crfsuite/lib/crf/src/train_passive_aggressive.c",
"chaine/_core/crfsuite/swig/crfsuite.cpp",
"chaine/_core/liblbfgs/lib/lbfgs.c",
"chaine/_core/trainer_wrapper.cpp",
]
INCLUDE_DIRS = [
"chaine/core/crfsuite/include/",
"chaine/core/crfsuite/lib/cqdb/include",
"chaine/core/liblbfgs/include",
"chaine/core",
"chaine/_core/crfsuite/include/",
"chaine/_core/crfsuite/lib/cqdb/include",
"chaine/_core/liblbfgs/include",
"chaine/_core",
]
EXTENSION = Extension("chaine.core.crf", language="c++", include_dirs=INCLUDE_DIRS, sources=SOURCES)
EXTENSION = Extension("chaine._core.crf", language="c++", include_dirs=INCLUDE_DIRS, sources=SOURCES)


class ExtensionBuilder(build_ext):
Expand Down
2 changes: 1 addition & 1 deletion chaine/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from chaine import crf
from chaine.training import train
from chaine.training import train, optimize
725 changes: 478 additions & 247 deletions chaine/core/crf.cpp → chaine/_core/crf.cpp

Large diffs are not rendered by default.

6 changes: 4 additions & 2 deletions chaine/core/crf.pyx → chaine/_core/crf.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,11 @@ cdef class Model:
if model.tell() <= 48:
raise ValueError(f"Model file {filepath} does not have a complete header")

def dump(self, filepath: Filepath):
self._tagger.dump(os.open(filepath, os.O_WRONLY | os.O_CREAT))
def dump_transitions(self, filepath: Filepath):
self._tagger.dump_transitions(os.open(str(filepath), os.O_WRONLY | os.O_CREAT))

def dump_states(self, filepath: Filepath):
self._tagger.dump_states(os.open(str(filepath), os.O_WRONLY | os.O_CREAT))

cdef crfsuite_api.Item to_item(sequence) except+:
cdef crfsuite_api.Item c_item
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,15 @@ extern "C"
#include <stdio.h>
#include <stdarg.h>

/**
/**
* \addtogroup crfsuite_api CRFSuite C API
* @{
*
* The CRFSuite C API provides a low-level library for manupulating
* CRFSuite in C language.
*/

/**
/**
* \addtogroup crfsuite_misc Miscellaneous definitions and functions
* @{
*/
Expand Down Expand Up @@ -323,12 +323,20 @@ extern "C"
int (*get_attrs)(crfsuite_model_t *model, crfsuite_dictionary_t **ptr_attrs);

/**
* Print the model in human-readable format.
* Print the learned transitions as JSON.
* @param model The pointer to this model instance.
* @param fpo The FILE* pointer.
* @return int The status code.
*/
int (*dump)(crfsuite_model_t *model, FILE *fpo);
int (*dump_transitions)(crfsuite_model_t *model, FILE *fpo);

/**
* Print the learned states as JSON.
* @param model The pointer to this model instance.
* @param fpo The FILE* pointer.
* @return int The status code.
*/
int (*dump_states)(crfsuite_model_t *model, FILE *fpo);
};

/**
Expand Down Expand Up @@ -1023,7 +1031,7 @@ extern "C"

/**@}*/

/**
/**
* \addtogroup crfsuite_misc Miscellaneous definitions and functions
* @{
*/
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,8 @@ int crf1dm_get_labelref(crf1dm_t *model, int lid, feature_refs_t *ref);
int crf1dm_get_attrref(crf1dm_t *model, int aid, feature_refs_t *ref);
int crf1dm_get_featureid(feature_refs_t *ref, int i);
int crf1dm_get_feature(crf1dm_t *model, int fid, crf1dm_feature_t *f);
void crf1dm_dump(crf1dm_t *model, FILE *fp);
void crf1dm_dump_states(crf1dm_t *model, FILE *fp);
void crf1dm_dump_transitions(crf1dm_t *model, FILE *fp);

/** @} */

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@

#include <crfsuite.h>
#include "crf1d.h"
#include "json.h"

#define FILEMAGIC "lCRF"
#define MODELTYPE "FOMC"
Expand Down Expand Up @@ -1009,106 +1010,76 @@ int crf1dm_get_feature(crf1dm_t *model, int fid, crf1dm_feature_t *f)
return 0;
}

void crf1dm_dump(crf1dm_t *crf1dm, FILE *fp)
void crf1dm_dump_states(crf1dm_t *crf1dm, FILE *fp)
{
int j;
uint32_t i;
feature_refs_t refs;
const header_t *hfile = crf1dm->header;
const char *stringified_json;
JsonNode *states = json_mkarray();

/* Dump the file header. */
fprintf(fp, "FILEHEADER = {\n");
fprintf(fp, " magic: %c%c%c%c\n",
hfile->magic[0], hfile->magic[1], hfile->magic[2], hfile->magic[3]);
fprintf(fp, " size: %" PRIu32 "\n", hfile->size);
fprintf(fp, " type: %c%c%c%c\n",
hfile->type[0], hfile->type[1], hfile->type[2], hfile->type[3]);
fprintf(fp, " version: %" PRIu32 "\n", hfile->version);
fprintf(fp, " num_features: %" PRIu32 "\n", hfile->num_features);
fprintf(fp, " num_labels: %" PRIu32 "\n", hfile->num_labels);
fprintf(fp, " num_attrs: %" PRIu32 "\n", hfile->num_attrs);
fprintf(fp, " off_features: 0x%" PRIX32 "\n", hfile->off_features);
fprintf(fp, " off_labels: 0x%" PRIX32 "\n", hfile->off_labels);
fprintf(fp, " off_attrs: 0x%" PRIX32 "\n", hfile->off_attrs);
fprintf(fp, " off_labelrefs: 0x%" PRIX32 "\n", hfile->off_labelrefs);
fprintf(fp, " off_attrrefs: 0x%" PRIX32 "\n", hfile->off_attrrefs);
fprintf(fp, "}\n");
fprintf(fp, "\n");

/* Dump the labels. */
fprintf(fp, "LABELS = {\n");
for (i = 0; i < hfile->num_labels; ++i)
{
const char *str = crf1dm_to_label(crf1dm, i);
#if 0
int check = crf1dm_to_lid(crf1dm, str);
if (i != check) {
fprintf(fp, "WARNING: inconsistent label CQDB\n");
}
#endif
fprintf(fp, " %5" PRIu32 ": %s\n", i, str);
}
fprintf(fp, "}\n");
fprintf(fp, "\n");

/* Dump the attributes. */
fprintf(fp, "ATTRIBUTES = {\n");
for (i = 0; i < hfile->num_attrs; ++i)
{
const char *str = crf1dm_to_attr(crf1dm, i);
#if 0
int check = crf1dm_to_aid(crf1dm, str);
if (i != check) {
fprintf(fp, "WARNING: inconsistent attribute CQDB\n");
}
#endif
fprintf(fp, " %5" PRIu32 ": %s\n", i, str);
}
fprintf(fp, "}\n");
fprintf(fp, "\n");

/* Dump the transition features. */
fprintf(fp, "TRANSITIONS = {\n");
for (i = 0; i < hfile->num_labels; ++i)
{
crf1dm_get_labelref(crf1dm, i, &refs);
crf1dm_get_attrref(crf1dm, i, &refs);
for (j = 0; j < refs.num_features; ++j)
{
crf1dm_feature_t f;
int fid = crf1dm_get_featureid(&refs, j);
const char *from = NULL, *to = NULL;
const char *attr = NULL, *label = NULL;
JsonNode *state = json_mkobject();

crf1dm_get_feature(crf1dm, fid, &f);
from = crf1dm_to_label(crf1dm, f.src);
to = crf1dm_to_label(crf1dm, f.dst);
fprintf(fp, " (%d) %s --> %s: %f\n", f.type, from, to, f.weight);

attr = crf1dm_to_attr(crf1dm, f.src);
label = crf1dm_to_label(crf1dm, f.dst);

json_append_member(state, "feature", json_mkstring(attr));
json_append_member(state, "label", json_mkstring(label));
json_append_member(state, "weight", json_mknumber(f.weight));

json_append_element(states, state);
}
}
fprintf(fp, "}\n");
fprintf(fp, "\n");

/* Dump the transition features. */
fprintf(fp, "STATE_FEATURES = {\n");
for (i = 0; i < hfile->num_attrs; ++i)
stringified_json = json_stringify(states, " ");
fprintf(fp, stringified_json);
free(stringified_json);
}

void crf1dm_dump_transitions(crf1dm_t *crf1dm, FILE *fp)
{
int j;
uint32_t i;
feature_refs_t refs;
const header_t *hfile = crf1dm->header;
const char *stringified_json;
JsonNode *transitions = json_mkarray();

for (i = 0; i < hfile->num_labels; ++i)
{
crf1dm_get_attrref(crf1dm, i, &refs);
crf1dm_get_labelref(crf1dm, i, &refs);
for (j = 0; j < refs.num_features; ++j)
{
crf1dm_feature_t f;
int fid = crf1dm_get_featureid(&refs, j);
const char *attr = NULL, *to = NULL;
const char *from = NULL, *to = NULL;
JsonNode *transition = json_mkobject();

crf1dm_get_feature(crf1dm, fid, &f);
#if 0
if (f.src != i) {
fprintf(fp, "WARNING: an inconsistent attribute reference.\n");
}
#endif
attr = crf1dm_to_attr(crf1dm, f.src);

from = crf1dm_to_label(crf1dm, f.src);
to = crf1dm_to_label(crf1dm, f.dst);
fprintf(fp, " (%d) %s --> %s: %f\n", f.type, attr, to, f.weight);

json_append_member(transition, "from", json_mkstring(from));
json_append_member(transition, "to", json_mkstring(to));
json_append_member(transition, "weight", json_mknumber(f.weight));

json_append_element(transitions, transition);
}
}
fprintf(fp, "}\n");
fprintf(fp, "\n");

stringified_json = json_stringify(transitions, " ");
fprintf(fp, stringified_json);
free(stringified_json);
}
Original file line number Diff line number Diff line change
Expand Up @@ -463,10 +463,17 @@ static int model_get_attrs(crfsuite_model_t *model, crfsuite_dictionary_t **ptr_
return 0;
}

static int model_dump(crfsuite_model_t *model, FILE *fpo)
static int model_dump_transitions(crfsuite_model_t *model, FILE *fpo)
{
model_internal_t *internal = (model_internal_t *)model->internal;
crf1dm_dump(internal->crf1dm, fpo);
crf1dm_dump_transitions(internal->crf1dm, fpo);
return 0;
}

static int model_dump_states(crfsuite_model_t *model, FILE *fpo)
{
model_internal_t *internal = (model_internal_t *)model->internal;
crf1dm_dump_states(internal->crf1dm, fpo);
return 0;
}

Expand Down Expand Up @@ -546,7 +553,9 @@ static int crf1m_model_create(crf1dm_t *crf1dm, void **ptr_model)
model->get_attrs = model_get_attrs;
model->get_labels = model_get_labels;
model->get_tagger = model_get_tagger;
model->dump = model_dump;
model->dump_transitions = model_dump_transitions;
model->dump_states = model_dump_states;


*ptr_model = model;
return 0;
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
Loading

0 comments on commit 3a2626b

Please sign in to comment.